Setup
load sero-epi functions
source("seroepi_functions.R")
set colour palettes
# source: https://davidmathlogic.com/colorblind/#%23D81B60-%231E88E5-%23FFC107-%23004D40
region_cols <- c(`Southern Asia` ="#D81B60", `Western Africa`="#1E88E5", `Eastern Africa`="#FFC107", `Southern Africa`="#81B1A9", Global="black")
group_cols <- c(All="black", Fatal ="#D81B60", ESBL="#1E88E5", CP="#FFC107")
load data on included samples - Table S3
data_NNS_sites10 <- read_tsv("tables/TableS3_IsolatesIncluded_NEEDSACCESSIONS.tsv") %>%
mutate(Study=if_else(Study=="GBS", "GBS-COP", Study))
## Rows: 1930 Columns: 36
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (29): Genome Name, Study, Region, Country, Cluster, neonatal, K_locus, K...
## dbl (7): Year, N50, contig_count, total_size, resistance_score, Mortality, ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
K locus analysis
read posterior estimates for adjusted counts (main) and raw counts
(for comparison)
# read adjusted Bayesian estimates for K
kbayes <- parseModelledEstimates(global_post="outputs_core/K_Full_ALL_adj_28_posterior_global.csv.gz", region_post="outputs_core/K_Full_ALL_adj_28_posterior_subgroup.csv.gz")
## Rows: 1080000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 4320000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
# read raw Bayesian estimates for K
kbayes_raw <- parseModelledEstimates(global_post="outputs_core/K_Full_ALL_raw_28_posterior_global.csv.gz", region_post="outputs_core/K_Full_ALL_raw_28_posterior_subgroup.csv.gz")
## Rows: 1080000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 4320000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
global and regional prevalence - top20 K
k_prev_global_dist <- locus_ridgesplot(posterior=kbayes$global_post,
ranks=kbayes$locus_rank,
lines_every=10,
xlim=c(0,12), xbreaks=seq(0,12,1), scale=1.5,
title = "a) Global estimates")
## Joining with `by = join_by(locus)`
## Picking joint bandwidth of 0.0717
## Warning: Removed 46 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).

k_prev_region_heatmap <- region_heatmap(kbayes$region_est, kbayes$global_est,
ranks=kbayes$locus_rank, median=F, title="b) Point estimates")
## Warning: Removed 300 rows containing missing values or values outside the scale range
## (`geom_tile()`).
## Warning: Removed 300 rows containing missing values or values outside the scale range
## (`geom_text()`).

fig 2
k_prev_global_dist + k_prev_region_heatmap
## Picking joint bandwidth of 0.0717
## Warning: Removed 46 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).
## Warning: Removed 300 rows containing missing values or values outside the scale range
## (`geom_tile()`).
## Warning: Removed 300 rows containing missing values or values outside the scale range
## (`geom_text()`).

ggsave("figures/Fig2_K_global_regional_Kadj.pdf", width=8, height=6)
## Picking joint bandwidth of 0.0717
## Warning: Removed 46 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).
## Warning: Removed 300 rows containing missing values or values outside the scale range
## (`geom_tile()`).
## Warning: Removed 300 rows containing missing values or values outside the scale range
## (`geom_text()`).
ggsave("figures/Fig2_K_global_regional_Kadj.png", width=8, height=6)
## Picking joint bandwidth of 0.0717
## Warning: Removed 46 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).
## Warning: Removed 300 rows containing missing values or values outside the scale range
## (`geom_tile()`).
## Warning: Removed 300 rows containing missing values or values outside the scale range
## (`geom_text()`).
global and regional prevalence - all K
k_prev_global_dist_all <- locus_ridgesplot(posterior=kbayes$global_post,
ranks=kbayes$locus_rank,
maxRank = 90, axis_font_size=8,
lines_every=10,
xlim=c(0,12), xbreaks=seq(0,12,1), scale=3, title="a) Global estimates")
## Joining with `by = join_by(locus)`
## Picking joint bandwidth of 0.04
## Warning: Removed 46 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).

k_prev_region_heatmap_all <- region_heatmap(kbayes$region_est, kbayes$global_est,
ranks=kbayes$locus_rank,
maxRank = 90, axis_font_size=8, median=F, title="b) Point estimates")

logistic regression of K vs year + region
result <- c()
for (k in kbayes$locus_rank$locus[1:30]) {
d <- data_NNS_sites10 %>%
mutate(k=if_else(K_locus==k, 1, 0)) %>%
select(k, Year, Region)
m <- summary(logistf(k~Year+Region, data=d))
result <- rbind(result, cbind(coef=m$coefficients, p=m$prob, locus=k))
}
## logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) -37.88700712 62.94467272 -161.20903016 86.48590232
## Year 0.01803655 0.03119787 -0.04361092 0.07915654
## RegionSouthern Africa -1.10817272 0.26921197 -1.66752642 -0.60562423
## RegionSouthern Asia -2.84833150 0.40634815 -3.75158569 -2.13078305
## RegionWestern Africa -1.29017996 0.49177143 -2.41049936 -0.43538436
## Chisq p method
## (Intercept) 0.3591336 5.489878e-01 2
## Year 0.3313495 5.648655e-01 2
## RegionSouthern Africa 20.8012324 5.095017e-06 2
## RegionSouthern Asia Inf 0.000000e+00 2
## RegionWestern Africa 9.8847074 1.666580e-03 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=129.0551 on 4 df, p=0, n=1930
## Wald test = 634.4304 on 4 df, p = 0
## Warning in logistf(k ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(k ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year,
## RegionSouthern Africa, RegionSouthern Asia, RegionWestern Africa exceeded. Try
## to increase the number of iterations by passing 'logistpl.control(maxit=...)'
## to parameter plcontrol
## logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) 130.000000000 42.35322992 -370.0000000 630.0000000
## Year -0.064612245 0.02099325 -0.3126068 0.1829403
## RegionSouthern Africa -0.005222793 0.15315520 -9.9309302 2.2267548
## RegionSouthern Asia -0.136507533 0.11892282 -10.9495878 1.4778546
## RegionWestern Africa -0.115089058 0.24308356 -26.1063908 4.8158145
## Chisq p method
## (Intercept) 0.000000 1.000000e+00 2
## Year 0.000000 1.000000e+00 2
## RegionSouthern Africa 1.454453 2.278148e-01 2
## RegionSouthern Asia 52.980701 3.368417e-13 2
## RegionWestern Africa 4.038985 4.446057e-02 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=-332.7095 on 4 df, p=1, n=1930
## Wald test = 104.6445 on 4 df, p = 0logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) 13.293360660 100.51294006 -184.4396000 214.10728127
## Year -0.008099124 0.04982077 -0.1076478 0.08989806
## RegionSouthern Africa 1.561731650 0.24805532 1.0763103 2.05431012
## RegionSouthern Asia -0.757212766 0.35535994 -1.5072348 -0.09533611
## RegionWestern Africa -0.862409383 0.83530080 -3.0509975 0.46804954
## Chisq p method
## (Intercept) 0.01716419 0.89576566 2
## Year 0.02593953 0.87204809 2
## RegionSouthern Africa Inf 0.00000000 2
## RegionSouthern Asia 5.10052299 0.02391863 2
## RegionWestern Africa 1.38804125 0.23873616 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=66.95806 on 4 df, p=9.947598e-14, n=1930
## Wald test = 706.3714 on 4 df, p = 0logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) -5.836391670 68.60219601 -141.05894975 129.13571265
## Year 0.001570549 0.03400289 -0.06533417 0.06858862
## RegionSouthern Africa -2.424355266 0.83115117 -4.60668849 -1.11156195
## RegionSouthern Asia 1.540401523 0.17270952 1.20440845 1.88320180
## RegionWestern Africa -0.725214512 0.65381614 -2.31059404 0.36532572
## Chisq p method
## (Intercept) 0.007184999 9.324487e-01 2
## Year 0.002117681 9.632957e-01 2
## RegionSouthern Africa 19.178123536 1.190702e-05 2
## RegionSouthern Asia Inf 0.000000e+00 2
## RegionWestern Africa 1.521914247 2.173300e-01 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=147.8983 on 4 df, p=0, n=1930
## Wald test = 665.7401 on 4 df, p = 0
## Warning in logistf(k ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(k ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year exceeded.
## Try to increase the number of iterations by passing
## 'logistpl.control(maxit=...)' to parameter plcontrol
## logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) -130.00000000 81.81519927 -630.0000000 370.0000000
## Year 0.06305563 0.04054606 -0.1851299 0.3105408
## RegionSouthern Africa 0.29538106 0.26948902 -2.0553070 2.6780374
## RegionSouthern Asia 0.12550575 0.22392517 -1.8835321 2.2145361
## RegionWestern Africa 0.39566655 0.41869039 -4.9012053 3.4107789
## Chisq p method
## (Intercept) 0.000000 1.000000e+00 2
## Year 0.000000 1.000000e+00 2
## RegionSouthern Africa 28.299026 1.039483e-07 2
## RegionSouthern Asia 15.063837 1.039355e-04 2
## RegionWestern Africa 4.088582 4.317385e-02 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=-24.24784 on 4 df, p=1, n=1930
## Wald test = 857.3845 on 4 df, p = 0
## Warning in logistf(k ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(k ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year,
## RegionWestern Africa exceeded. Try to increase the number of iterations by
## passing 'logistpl.control(maxit=...)' to parameter plcontrol
## logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95 Chisq
## (Intercept) -130.00000000 82.17544596 -630.0000000 370.0000000 0
## Year 0.06316023 0.04072477 -0.1849201 0.3107086 0
## RegionSouthern Africa -0.11865221 0.28135867 -4.6453979 2.4021033 0
## RegionSouthern Asia -0.58058645 0.25044206 -9.5774304 1.3155810 0
## RegionWestern Africa -0.54912634 0.55397731 -42.5317904 2.9448386 0
## p method
## (Intercept) 1 2
## Year 1 2
## RegionSouthern Africa 1 2
## RegionSouthern Asia 1 2
## RegionWestern Africa 1 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=-36.60871 on 4 df, p=1, n=1930
## Wald test = 851.3438 on 4 df, p = 0
## Warning in logistf(k ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(k ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year exceeded.
## Try to increase the number of iterations by passing
## 'logistpl.control(maxit=...)' to parameter plcontrol
## logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) -130.00000000 113.34292036 -630.0000000 370.0000000
## Year 0.06260313 0.05616994 -0.1856303 0.3100522
## RegionSouthern Africa 0.06377154 0.43064858 -4.5265784 3.3025814
## RegionSouthern Asia 0.74218717 0.29006127 -0.6282241 3.8778357
## RegionWestern Africa 0.33087275 0.64094786 -8.6563226 4.2429066
## Chisq p method
## (Intercept) 0.000000 1.000000e+00 2
## Year 0.000000 1.000000e+00 2
## RegionSouthern Africa 2.348115 1.254346e-01 2
## RegionSouthern Asia 68.763252 1.110223e-16 2
## RegionWestern Africa 1.494815 2.214708e-01 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=0.0388931 on 4 df, p=0.9998133, n=1930
## Wald test = 708.9709 on 4 df, p = 0logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) -9.287309e+01 150.19917308 -394.0382572 210.9756308
## Year 4.401260e-02 0.07443808 -0.1066044 0.1932379
## RegionSouthern Africa -1.668985e-05 0.55212092 -1.2093728 1.0171781
## RegionSouthern Asia -3.992089e-02 0.43784822 -0.9523869 0.8016919
## RegionWestern Africa 1.494779e+00 0.49911715 0.4141775 2.4189995
## Chisq p method
## (Intercept) 3.633107e-01 0.546673301 2
## Year 3.322166e-01 0.564356778 2
## RegionSouthern Africa 4.179225e-08 0.999836887 2
## RegionSouthern Asia 8.177574e-03 0.927945565 2
## RegionWestern Africa 6.802541e+00 0.009102825 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=7.490749 on 4 df, p=0.1121179, n=1930
## Wald test = 546.3554 on 4 df, p = 0
## Warning in logistf(k ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(k ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year,
## RegionSouthern Africa exceeded. Try to increase the number of iterations by
## passing 'logistpl.control(maxit=...)' to parameter plcontrol
## logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) 130.00000000 95.90941917 -370.0000000 630.0000000
## Year -0.06581753 0.04754515 -0.3138735 0.1818001
## RegionSouthern Africa -0.72863282 0.43193324 -40.1044448 1.5512142
## RegionSouthern Asia -0.58153184 0.30071691 -7.7393960 1.1304430
## RegionWestern Africa -0.33267553 0.57723135 -13.7947501 2.5832885
## Chisq p method
## (Intercept) 0.000000 1.000000e+00 2
## Year 0.000000 1.000000e+00 2
## RegionSouthern Africa 49.074572 2.464140e-12 2
## RegionSouthern Asia 56.343948 6.084022e-14 2
## RegionWestern Africa 2.798747 9.433798e-02 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=-4.102337 on 4 df, p=1, n=1930
## Wald test = 784.6969 on 4 df, p = 0
## Warning in logistf(k ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) -86.56364422 184.50293426 -481.823818 277.1169491
## Year 0.03963728 0.09143892 -0.140649 0.2354744
## RegionSouthern Africa 0.31768391 1.63625155 -4.677210 3.2789784
## RegionSouthern Asia 3.49603759 0.85205684 2.086662 5.7147520
## RegionWestern Africa 1.55807953 1.62129221 -3.428864 4.4986615
## Chisq p method
## (Intercept) 0.20476609 0.6509010 2
## Year 0.17451300 0.6761317 2
## RegionSouthern Africa 0.03524504 0.8510831 2
## RegionSouthern Asia Inf 0.0000000 2
## RegionWestern Africa 0.67652911 0.4107845 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=48.42003 on 4 df, p=7.714304e-10, n=1930
## Wald test = 297.622 on 4 df, p = 0
## Warning in logistf(k ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(k ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year,
## RegionSouthern Africa, RegionSouthern Asia, RegionWestern Africa exceeded. Try
## to increase the number of iterations by passing 'logistpl.control(maxit=...)'
## to parameter plcontrol
## logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95 Chisq p
## (Intercept) 130.00000000 78.04801956 -370.0000000 630.0000000 0 1
## Year -0.06577325 0.03869063 -0.3139322 0.1816988 0 1
## RegionSouthern Africa 0.18520086 0.29709283 -57.3074714 3.3731811 0 1
## RegionSouthern Asia 0.63212472 0.20477903 -3.0375828 6.2111558 0 1
## RegionWestern Africa 0.07147776 0.46816261 -46.7892963 19.9593452 0 1
## method
## (Intercept) 2
## Year 2
## RegionSouthern Africa 2
## RegionSouthern Asia 2
## RegionWestern Africa 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=-116.5062 on 4 df, p=1, n=1930
## Wald test = 859.8824 on 4 df, p = 0logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) -2.5289717314 163.3541682 -327.3014277 335.1864314
## Year -0.0006874461 0.0809675 -0.1681138 0.1602526
## RegionSouthern Africa -0.0547657083 0.5534894 -1.2668681 0.9693052
## RegionSouthern Asia -1.3429350411 0.6823067 -2.9727427 -0.1560814
## RegionWestern Africa 0.5352844642 0.6755352 -1.0819827 1.6977178
## Chisq p method
## (Intercept) 2.262995e-04 0.98799767 2
## Year 6.810321e-05 0.99341556 2
## RegionSouthern Africa 9.740342e-03 0.92138189 2
## RegionSouthern Asia 5.070673e+00 0.02433393 2
## RegionWestern Africa 5.433242e-01 0.46105832 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=6.847377 on 4 df, p=0.1441769, n=1930
## Wald test = 497.3219 on 4 df, p = 0
## Warning in logistf(k ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(k ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year,
## RegionSouthern Africa, RegionWestern Africa exceeded. Try to increase the
## number of iterations by passing 'logistpl.control(maxit=...)' to parameter
## plcontrol
## logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95 Chisq p
## (Intercept) 130.00000000 134.22619840 -370.000000 630.000000 0 1
## Year -0.06653586 0.06654095 -0.314773 0.180839 0 1
## RegionSouthern Africa 2.95363495 0.33867139 2.177617 17.499040 Inf 0
## RegionSouthern Asia 0.45165127 0.41957356 -4.459098 14.069911 0 1
## RegionWestern Africa 0.32950543 0.85060573 -32.033487 14.407974 0 1
## method
## (Intercept) 2
## Year 2
## RegionSouthern Africa 2
## RegionSouthern Asia 2
## RegionWestern Africa 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=138.6139 on 4 df, p=0, n=1930
## Wald test = 551.9099 on 4 df, p = 0
## Warning in logistf(k ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(k ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year exceeded.
## Try to increase the number of iterations by passing
## 'logistpl.control(maxit=...)' to parameter plcontrol
## logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) -130.00000000 150.18908447 -630.0000000 278.9166630
## Year 0.06231615 0.07442974 -0.1405737 0.3098365
## RegionSouthern Africa -0.41165962 0.67317470 -3.8333338 1.2735644
## RegionSouthern Asia 0.78842620 0.37774531 -0.1097688 2.1725314
## RegionWestern Africa -0.26447518 1.08897622 -7.9268974 2.1026305
## Chisq p method
## (Intercept) 0.00000 1.000000e+00 2
## Year 0.00000 1.000000e+00 2
## RegionSouthern Africa 0.00000 1.000000e+00 2
## RegionSouthern Asia 33.14629 8.547941e-09 2
## RegionWestern Africa 0.00000 1.000000e+00 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=8.514077 on 4 df, p=0.07446169, n=1930
## Wald test = 547.9156 on 4 df, p = 0
## Warning in logistf(k ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(k ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year exceeded.
## Try to increase the number of iterations by passing
## 'logistpl.control(maxit=...)' to parameter plcontrol
## logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) -130.00000000 100.55350447 -630.0000000 370.0000000
## Year 0.06288445 0.04983212 -0.1852318 0.3104054
## RegionSouthern Africa -0.29650010 0.37953944 -11.5947192 2.8680071
## RegionSouthern Asia -0.19711050 0.28599828 -5.4876196 2.5693238
## RegionWestern Africa 0.28176425 0.50752231 -10.1203269 4.2209916
## Chisq p method
## (Intercept) 0.000000 1.0000000 2
## Year 0.000000 1.0000000 2
## RegionSouthern Africa 0.000000 1.0000000 2
## RegionSouthern Asia 0.000000 1.0000000 2
## RegionWestern Africa 2.333477 0.1266188 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=-37.75674 on 4 df, p=1, n=1930
## Wald test = 780.8683 on 4 df, p = 0
## Warning in logistf(k ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(k ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year,
## RegionSouthern Africa, RegionSouthern Asia exceeded. Try to increase the number
## of iterations by passing 'logistpl.control(maxit=...)' to parameter plcontrol
## logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95 Chisq
## (Intercept) 130.00000000 131.07456157 -370.0000000 630.0000000 0
## Year -0.06629896 0.06497838 -0.3144327 0.1812064 0
## RegionSouthern Africa 0.64407943 0.42148886 -2.4845919 3.2026937 0
## RegionSouthern Asia 0.18928824 0.36855386 -3.9269408 3.5431429 0
## RegionWestern Africa 0.76517970 0.57302894 -5.2859816 5.1747592 0
## p method
## (Intercept) 1 2
## Year 1 2
## RegionSouthern Africa 1 2
## RegionSouthern Asia 1 2
## RegionWestern Africa 1 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=-15.84383 on 4 df, p=1, n=1930
## Wald test = 655.9728 on 4 df, p = 0
## Warning in logistf(k ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(k ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year,
## RegionSouthern Africa, RegionWestern Africa exceeded. Try to increase the
## number of iterations by passing 'logistpl.control(maxit=...)' to parameter
## plcontrol
## logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) 130.000000000 108.70208142 -370.0000000 630.0000000
## Year -0.066153380 0.05388737 -0.3143147 0.1813291
## RegionSouthern Africa -0.033681532 0.45180851 -40.8127398 3.4352329
## RegionSouthern Asia 0.695420328 0.28164204 -1.4916843 6.0597885
## RegionWestern Africa 0.001321003 0.68123142 -39.3723701 5.9850061
## Chisq p method
## (Intercept) 0.000000 1.0000000 2
## Year 0.000000 1.0000000 2
## RegionSouthern Africa 1.743079 0.1867492 2
## RegionSouthern Asia 0.000000 1.0000000 2
## RegionWestern Africa 0.000000 1.0000000 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=-24.96682 on 4 df, p=1, n=1930
## Wald test = 732.9121 on 4 df, p = 0
## Warning in logistf(k ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(k ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year exceeded.
## Try to increase the number of iterations by passing
## 'logistpl.control(maxit=...)' to parameter plcontrol
## logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) -1.300000e+02 113.03460149 -630.0000000 370.0000000
## Year 6.273860e-02 0.05601728 -0.1853879 0.3102525
## RegionSouthern Africa -3.728792e-01 0.44620658 -13.2287359 2.5699286
## RegionSouthern Asia 4.646229e-03 0.30913211 -3.2410869 2.6855528
## RegionWestern Africa 1.485499e-01 0.61133533 -11.7871856 3.8904989
## Chisq p method
## (Intercept) 0.0000000 1.0000000 2
## Year 0.0000000 1.0000000 2
## RegionSouthern Africa 0.0000000 1.0000000 2
## RegionSouthern Asia 0.3399628 0.5598507 2
## RegionWestern Africa 0.7492826 0.3867035 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=-24.08208 on 4 df, p=1, n=1930
## Wald test = 720.5392 on 4 df, p = 0logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) 67.42616492 191.61169358 -318.6943552 470.4228275
## Year -0.03552461 0.09498226 -0.2353401 0.1558267
## RegionSouthern Africa -0.23958403 0.71929929 -1.9145684 1.0540654
## RegionSouthern Asia -0.60408646 0.61429099 -1.9958828 0.5249492
## RegionWestern Africa -0.74958430 1.42915330 -5.6030294 1.2752906
## Chisq p method
## (Intercept) 0.1142159 0.7353945 2
## Year 0.1290975 0.7193696 2
## RegionSouthern Africa 0.1134622 0.7362364 2
## RegionSouthern Asia 1.0633022 0.3024632 2
## RegionWestern Africa 0.3430248 0.5580884 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=1.85572 on 4 df, p=0.7622738, n=1930
## Wald test = 432.0288 on 4 df, p = 0
## Warning in logistf(k ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(k ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year,
## RegionSouthern Asia exceeded. Try to increase the number of iterations by
## passing 'logistpl.control(maxit=...)' to parameter plcontrol
## logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) -130.00000000 97.1257619 -630.0000000 370.0000000
## Year 0.06287866 0.0481334 -0.1852788 0.3103755
## RegionSouthern Africa 0.02759329 0.3412474 -8.4368436 4.9140508
## RegionSouthern Asia 0.10724356 0.2636664 -3.6992784 4.7415529
## RegionWestern Africa 0.15826796 0.5349839 -22.5771802 5.9224465
## Chisq p method
## (Intercept) 0.000000 1.0000000000 2
## Year 0.000000 1.0000000000 2
## RegionSouthern Africa 2.205763 0.1374958471 2
## RegionSouthern Asia 13.072036 0.0002997371 2
## RegionWestern Africa 1.243564 0.2647852482 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=-52.44613 on 4 df, p=1, n=1930
## Wald test = 801.223 on 4 df, p = 0logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) 40.16497678 187.65434397 -336.3843993 434.6257289
## Year -0.02198032 0.09301729 -0.2175561 0.1646218
## RegionSouthern Africa 0.26081306 0.57750519 -0.9884602 1.3551362
## RegionSouthern Asia -1.03248446 0.69590913 -2.6843960 0.2005478
## RegionWestern Africa -0.82037254 1.42802169 -5.6727860 1.1984511
## Chisq p method
## (Intercept) 0.04241190 0.8368366 2
## Year 0.05172378 0.8200905 2
## RegionSouthern Africa 0.19340481 0.6600978 2
## RegionSouthern Asia 2.73361260 0.0982568 2
## RegionWestern Africa 0.42208547 0.5158983 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=4.147128 on 4 df, p=0.3864603, n=1930
## Wald test = 443.674 on 4 df, p = 0
## Warning in logistf(k ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(k ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year,
## RegionSouthern Africa, RegionSouthern Asia exceeded. Try to increase the number
## of iterations by passing 'logistpl.control(maxit=...)' to parameter plcontrol
## logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) -130.00000000 97.33432906 -630.0000000 370.000000
## Year 0.06291597 0.04823681 -0.1851852 0.310441
## RegionSouthern Africa -0.34211618 0.37426273 -27.7561357 3.881645
## RegionSouthern Asia -0.45603242 0.29941085 -43.7377732 2.975136
## RegionWestern Africa 1.25529523 0.35249581 -3.1626974 6.155618
## Chisq p method
## (Intercept) 0.00000 1.00000e+00 2
## Year 0.00000 1.00000e+00 2
## RegionSouthern Africa 0.00000 1.00000e+00 2
## RegionSouthern Asia 0.00000 1.00000e+00 2
## RegionWestern Africa 30.33557 3.63401e-08 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=-29.15767 on 4 df, p=1, n=1930
## Wald test = 776.6436 on 4 df, p = 0
## Warning in logistf(k ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(k ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year,
## RegionSouthern Africa, RegionSouthern Asia, RegionWestern Africa exceeded. Try
## to increase the number of iterations by passing 'logistpl.control(maxit=...)'
## to parameter plcontrol
## logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) -130.00000000 79.26079190 -630.0000000 370.0000000
## Year 0.06308596 0.03928019 -0.1850915 0.3105714
## RegionSouthern Africa 0.82281458 0.23134895 -1.6159109 81.4641308
## RegionSouthern Asia 0.07893681 0.22038916 -6.9120299 157.9068728
## RegionWestern Africa 0.05212533 0.46369973 -47.6618229 94.4174770
## Chisq p method
## (Intercept) 0.0000000 1.0000000000 2
## Year 0.0000000 1.0000000000 2
## RegionSouthern Africa Inf 0.0000000000 2
## RegionSouthern Asia 17.1494083 0.0000345517 2
## RegionWestern Africa 0.6679147 0.4137795700 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=-97.70746 on 4 df, p=1, n=1930
## Wald test = 848.6715 on 4 df, p = 0
## Warning in logistf(k ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year exceeded.
## Try to increase the number of iterations by passing
## 'logistpl.control(maxit=...)' to parameter plcontrol
## logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) 37.75387494 227.4609149 -430.9132528 525.4459673
## Year -0.02111177 0.1127486 -0.2629266 0.2111244
## RegionSouthern Africa -1.29173684 1.4713700 -6.1747336 0.9015368
## RegionSouthern Asia -0.02587252 0.6623794 -1.4935255 1.2481205
## RegionWestern Africa 1.48137361 0.7221376 -0.1998597 2.7963714
## Chisq p method
## (Intercept) 0.024391227 0.8758936 2
## Year 0.031056464 0.8601145 2
## RegionSouthern Africa 1.074239425 0.2999898 2
## RegionSouthern Asia 0.001448938 0.9696359 2
## RegionWestern Africa 3.101150142 0.0782370 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=4.971146 on 4 df, p=0.2902709, n=1930
## Wald test = 338.3153 on 4 df, p = 0
## Warning in logistf(k ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(k ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year exceeded.
## Try to increase the number of iterations by passing
## 'logistpl.control(maxit=...)' to parameter plcontrol
## logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) 127.28937147 215.2263317 -372.7106285 627.2893715
## Year -0.06556842 0.1066958 -0.3136308 0.1820858
## RegionSouthern Africa -0.40981472 1.0897222 -6.4813708 1.8397821
## RegionSouthern Asia 0.93212232 0.5504222 -0.2233164 2.6005360
## RegionWestern Africa 0.24025986 1.2879280 -5.4493903 2.7180379
## Chisq p method
## (Intercept) 0.000000 1.0000000 2
## Year 0.000000 1.0000000 2
## RegionSouthern Africa 1.611557 0.2042731 2
## RegionSouthern Asia 2.479686 0.1153253 2
## RegionWestern Africa 0.000000 1.0000000 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=3.7693 on 4 df, p=0.438127, n=1930
## Wald test = 370.1267 on 4 df, p = 0
## Warning in logistf(k ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(k ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year,
## RegionSouthern Africa, RegionSouthern Asia, RegionWestern Africa exceeded. Try
## to increase the number of iterations by passing 'logistpl.control(maxit=...)'
## to parameter plcontrol
## logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) -130.00000000 65.34167150 -630.0000000 370.000000
## Year 0.06327475 0.03238243 -0.1849337 0.310748
## RegionSouthern Africa -0.12595693 0.25215434 -44.8215034 185.227699
## RegionSouthern Asia 0.61534944 0.16881614 -1.2996132 186.488972
## RegionWestern Africa 0.02607048 0.39894575 -45.7886646 92.480010
## Chisq p method
## (Intercept) 0.0000000 1.0000000 2
## Year 0.0000000 1.0000000 2
## RegionSouthern Africa 0.0000000 1.0000000 2
## RegionSouthern Asia Inf 0.0000000 2
## RegionWestern Africa 0.4584689 0.4983404 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=-138.2533 on 4 df, p=1, n=1930
## Wald test = 832.8149 on 4 df, p = 0
## Warning in logistf(k ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(k ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year,
## RegionSouthern Africa, RegionWestern Africa exceeded. Try to increase the
## number of iterations by passing 'logistpl.control(maxit=...)' to parameter
## plcontrol
## logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95 Chisq p
## (Intercept) 130.00000000 119.2853913 -370.0000000 630.0000000 0 1
## Year -0.06625107 0.0591340 -0.3144155 0.1812235 0 1
## RegionSouthern Africa 0.30789860 0.4426178 -8.2555518 3.6189367 0 1
## RegionSouthern Asia 0.64359073 0.3119323 -1.6509401 6.2207866 0 1
## RegionWestern Africa 0.06128208 0.7290114 -38.6917511 6.2159535 0 1
## method
## (Intercept) 2
## Year 2
## RegionSouthern Africa 2
## RegionSouthern Asia 2
## RegionWestern Africa 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=-23.67887 on 4 df, p=1, n=1930
## Wald test = 693.3726 on 4 df, p = 0
## Warning in logistf(k ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(k ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year exceeded.
## Try to increase the number of iterations by passing
## 'logistpl.control(maxit=...)' to parameter plcontrol
## logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) 126.34968980 227.1797625 -373.6503102 626.349690
## Year -0.06582648 0.1126214 -0.3142313 0.181512
## RegionSouthern Africa 1.66136824 1.0665348 -0.6940896 9.080411
## RegionSouthern Asia 2.80818174 0.7935580 1.6670722 10.185614
## RegionWestern Africa 1.60510674 1.4781118 -3.7621368 9.207465
## Chisq p method
## (Intercept) 0.0000000 1.000000e+00 2
## Year 0.0000000 1.000000e+00 2
## RegionSouthern Africa 2.1209410 1.452972e-01 2
## RegionSouthern Asia 27.8831621 1.288663e-07 2
## RegionWestern Africa 0.7488254 3.868484e-01 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=20.48004 on 4 df, p=0.0004014047, n=1930
## Wald test = 296.3746 on 4 df, p = 0
## Warning in logistf(k ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(k ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year exceeded.
## Try to increase the number of iterations by passing
## 'logistpl.control(maxit=...)' to parameter plcontrol
## logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) 130.00000000 115.59195956 -355.235652 630.0000000
## Year -0.06596543 0.05730267 -0.313986 0.1743997
## RegionSouthern Africa -0.89015265 0.53288501 -7.872837 0.9249096
## RegionSouthern Asia -1.27786915 0.44476257 -20.237517 -0.2057696
## RegionWestern Africa -0.40619838 0.68538874 -6.861860 1.7952887
## Chisq p method
## (Intercept) 0.000000 1.000000e+00 2
## Year 0.000000 1.000000e+00 2
## RegionSouthern Africa 36.971556 1.198651e-09 2
## RegionSouthern Asia 67.396484 2.220446e-16 2
## RegionWestern Africa 2.122844 1.451168e-01 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=19.57295 on 4 df, p=0.000606272, n=1930
## Wald test = 673.5065 on 4 df, p = 0
## Warning in logistf(k ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(k ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year,
## RegionSouthern Africa, RegionSouthern Asia, RegionWestern Africa exceeded. Try
## to increase the number of iterations by passing 'logistpl.control(maxit=...)'
## to parameter plcontrol
## logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95 Chisq
## (Intercept) -130.00000000 49.04076208 -630.0000000 370.0000000 0
## Year 0.06383803 0.02430516 -0.1842753 0.3114068 0
## RegionSouthern Africa -0.32326558 0.18069720 -53.5228447 5.5652948 0
## RegionSouthern Asia -0.26716240 0.13874266 -31.7520349 5.4974921 0
## RegionWestern Africa -0.19657118 0.28903769 -45.2972609 5.3490828 0
## p method
## (Intercept) 1 2
## Year 1 2
## RegionSouthern Africa 1 2
## RegionSouthern Asia 1 2
## RegionWestern Africa 1 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=-526.3923 on 4 df, p=1, n=1930
## Wald test = 541.8715 on 4 df, p = 0
logistic_sig <- as_tibble(result, rownames="variable") %>% filter(p<0.05) %>% left_join(kbayes$locus_rank)
## Joining with `by = join_by(locus)`
logistic_sig %>% filter(p<0.05)
## # A tibble: 15 × 5
## variable coef p locus rank
## <chr> <chr> <chr> <chr> <int>
## 1 RegionSouthern Asia -2.84833149909426 0 KL2 1
## 2 RegionWestern Africa -1.29017996241314 0.00166658030282141 KL2 1
## 3 RegionWestern Africa -0.115089058476192 0.0444605650057359 KL102 2
## 4 RegionSouthern Africa 1.56173164966805 0 KL25 3
## 5 RegionSouthern Asia -0.757212766403901 0.0239186306965349 KL25 3
## 6 RegionSouthern Asia 1.54040152282165 0 KL15 4
## 7 RegionSouthern Asia 0.125505745865047 0.000103935505882391 KL62 5
## 8 RegionWestern Africa 0.395666546573237 0.0431738528553842 KL62 5
## 9 RegionWestern Africa 1.49477887691326 0.00910282464090573 KL17 8
## 10 RegionSouthern Asia 3.49603759035185 0 KL51 10
## 11 RegionSouthern Asia -1.34293504105872 0.0243339302006811 KL39 12
## 12 RegionSouthern Africa 2.95363495302061 0 KL149 13
## 13 RegionSouthern Asia 0.107243564111339 0.000299737092402941 KL48 20
## 14 RegionSouthern Africa 0.822814575488765 0 KL8 23
## 15 RegionSouthern Asia 0.615349436699138 0 KL81 26
p<-0.02
region_2pc <- kbayes$region_est %>% select(locus, subgroup, mean) %>%
pivot_wider(id_cols=locus, names_from=subgroup, values_from = mean) %>%
mutate(d1=abs(`Eastern Africa`-`Southern Africa`),
d2=abs(`Eastern Africa`-`Western Africa`),
d3=abs(`Eastern Africa`-`Southern Asia`),
d4=abs(`Southern Africa`-`Southern Asia`),
d5=abs(`Southern Africa`-`Western Africa`),
d6=abs(`Western Africa`-`Southern Asia`)) %>%
filter(d1>p | d2>p | d3>p | d4>p | d5>p | d6>p)
Fig S3 - overlapping regional densities - log2 scale
kbayes$region_post %>%
left_join(kbayes$locus_rank) %>%
filter(rank<=30) %>%
mutate(Region=fct_relevel(subgroup,c("Western Africa", "Southern Africa", "Eastern Africa", "Southern Asia"))) %>%
ggplot() +
geom_hline(yintercept=11) +
geom_hline(yintercept=21) +
geom_density_ridges(aes(x=log2(prob), y=locus, fill=Region),
scale=1, rel_min_height = 0.01, alpha=0.5, col=NA) +
scale_y_discrete(limits=rev(kbayes$locus_rank$locus[1:30])) +
scale_x_continuous(limits=c(-13,0)) +
labs(x="log2(prevalence) per region", y="", fill="Region") +
annotate(y=region_2pc$locus, label="*", x=-1, geom="text") +
annotate(y=logistic_sig$locus, label="^", x=-0.5, geom="text") +
scale_fill_manual(values=region_cols) +
theme_bw()
## Joining with `by = join_by(locus)`
## Picking joint bandwidth of 0.123
## Warning: Removed 2636 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_text()`).

ggsave("figures/FigS3_RegionalKBayesDist.pdf", width=6, height=9)
## Picking joint bandwidth of 0.123
## Warning: Removed 2636 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_text()`).
ggsave("figures/FigS3_RegionalKBayesDist.png", width=6, height=9)
## Picking joint bandwidth of 0.123
## Warning: Removed 2636 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_text()`).
O locus analysis
read posterior estimates for adjusted counts (main) and raw counts
(for comparison)
# read adjusted Bayesian estimates for O
obayes <- parseModelledEstimates(global_post ="outputs_core/O_Full_ALL_adj_28_posterior_global.csv.gz", region_post = "outputs_core/O_Full_ALL_adj_28_posterior_subgroup.csv.gz", fixOnames = T)
## Rows: 180000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 720000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
# read raw Bayesian estimates for O
obayes_raw <- parseModelledEstimates(global_post ="outputs_core/O_Full_ALL_raw_28_posterior_global.csv.gz", region_post = "outputs_core/O_Full_ALL_raw_28_posterior_subgroup.csv.gz", fixOnames = T)
## Rows: 180000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 720000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
global and regional prevalence
o_prev_global_dist <- locus_ridgesplot(posterior=obayes$global_post,
ranks=obayes$locus_rank,
lines_every=5, maxRank=15,
xlim=c(0,30), xbreaks=seq(0,30,5), title="a) Global estimates")
## Joining with `by = join_by(locus)`
## Picking joint bandwidth of 0.0901
## Warning: Removed 257 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).

o_prev_region_heatmap <- region_heatmap(estimates=obayes$region_est, global=obayes$global_est,
ranks=obayes$locus_rank, maxRank=15, median=F, title="b) Point estimates")

# fig 4a
o_prev_global_dist + o_prev_region_heatmap
## Picking joint bandwidth of 0.0901
## Warning: Removed 257 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).

compare global prevalence distributions using cluster-adjusted and
raw counts
# plot raw vs adjusted distributions, overlaid - for appendix s2.4b
o_dist_raw_adj <- comparative_locus_ridgesplot(posterior1=obayes$global_post,
posterior2=obayes_raw$global_post,
ranks=obayes$locus_rank, maxRank=10,
lines_every=5, xlim=c(0,30), xbreaks=seq(0,30,5))
## Joining with `by = join_by(locus)`
## Picking joint bandwidth of 0.103
## Warning: Removed 257 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).

logistic regression of O vs year + region
oresult <- c()
for (o in obayes$locus_rank$locus[1:10]) {
d <- data_NNS_sites10 %>%
mutate(o=if_else(O_genotype==o, 1, 0)) %>%
select(o, Year, Region)
m <- summary(logistf(o~Year+Region, data=d))
oresult <- rbind(oresult, cbind(coef=m$coefficients, p=m$prob, locus=o))
}
## logistf(formula = o ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) -7.915304840 47.1640372 -100.40042475 84.68212427
## Year 0.003453048 0.0233770 -0.04244445 0.04929231
## RegionSouthern Africa -0.005521896 0.1660590 -0.33526304 0.31653647
## RegionSouthern Asia -0.493627573 0.1396298 -0.77089234 -0.22290635
## RegionWestern Africa 0.302361411 0.2491794 -0.19872272 0.78219635
## Chisq p method
## (Intercept) 0.02812636 0.8668120895 2
## Year 0.02178871 0.8826505754 2
## RegionSouthern Africa 0.00110587 0.9734715390 2
## RegionSouthern Asia 12.98331748 0.0003142786 2
## RegionWestern Africa 1.42967689 0.2318171782 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=17.81873 on 4 df, p=0.001338928, n=1930
## Wald test = 411.9142 on 4 df, p = 0
## Warning in logistf(o ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(o ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year exceeded.
## Try to increase the number of iterations by passing
## 'logistpl.control(maxit=...)' to parameter plcontrol
## logistf(formula = o ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) -130.00000000 43.66106855 -630.0000000 370.0000000
## Year 0.06410768 0.02163974 -0.1840446 0.3117062
## RegionSouthern Africa -0.09593226 0.15526698 -4.1624178 2.5154891
## RegionSouthern Asia -0.29052407 0.12334809 -4.4830882 1.5833615
## RegionWestern Africa 0.04536010 0.24576571 -8.8036957 4.4799611
## Chisq p method
## (Intercept) 0.000000 1.0000000 2
## Year 0.000000 1.0000000 2
## RegionSouthern Africa 0.000000 1.0000000 2
## RegionSouthern Asia 0.000000 1.0000000 2
## RegionWestern Africa 1.165171 0.2803956 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=-198.3195 on 4 df, p=1, n=1930
## Wald test = 214.3342 on 4 df, p = 0
## Warning in logistf(o ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(o ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year,
## RegionSouthern Africa, RegionSouthern Asia, RegionWestern Africa exceeded. Try
## to increase the number of iterations by passing 'logistpl.control(maxit=...)'
## to parameter plcontrol
## logistf(formula = o ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) 130.00000000 42.06475319 -370.0000000 630.0000000
## Year -0.06455622 0.02085008 -0.3125101 0.1830565
## RegionSouthern Africa -0.09161390 0.15240643 -5.9170481 1.9782353
## RegionSouthern Asia -0.23127866 0.11829966 -6.1353338 1.1683005
## RegionWestern Africa -0.21746002 0.24266285 -16.5117316 4.3234941
## Chisq p method
## (Intercept) 0.000000 1.000000e+00 2
## Year 0.000000 1.000000e+00 2
## RegionSouthern Africa 19.733654 8.901981e-06 2
## RegionSouthern Asia Inf 0.000000e+00 2
## RegionWestern Africa 7.759546 5.342938e-03 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=-130.2013 on 4 df, p=1, n=1930
## Wald test = 83.12975 on 4 df, p = 0logistf(formula = o ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) 45.40848091 64.25871853 -80.83585300 171.99995108
## Year -0.02380163 0.03185194 -0.08655544 0.03877092
## RegionSouthern Africa 0.85859872 0.23123433 0.39683133 1.30631592
## RegionSouthern Asia 1.73072584 0.16700314 1.40641665 2.06248351
## RegionWestern Africa -0.76890230 0.65339403 -2.35361206 0.32017357
## Chisq p method
## (Intercept) 0.4969499 0.4808433918 2
## Year 0.5557499 0.4559777730 2
## RegionSouthern Africa 12.7880943 0.0003488323 2
## RegionSouthern Asia Inf 0.0000000000 2
## RegionWestern Africa 1.7379098 0.1874038869 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=134.7828 on 4 df, p=0, n=1930
## Wald test = 697.314 on 4 df, p = 0
## Warning in logistf(o ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(o ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year,
## RegionSouthern Asia exceeded. Try to increase the number of iterations by
## passing 'logistpl.control(maxit=...)' to parameter plcontrol
## logistf(formula = o ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) -130.00000000 56.86737146 -630.000000 370.0000000
## Year 0.06356031 0.02818327 -0.184581 0.3110891
## RegionSouthern Africa -0.36240248 0.21810706 -24.601079 3.2446957
## RegionSouthern Asia -0.03180749 0.15697475 -4.386919 3.3576880
## RegionWestern Africa 0.15907331 0.31044666 -15.940080 5.1151587
## Chisq p method
## (Intercept) 0.000000 1.00000000 2
## Year 0.000000 1.00000000 2
## RegionSouthern Africa 0.000000 1.00000000 2
## RegionSouthern Asia 0.000000 1.00000000 2
## RegionWestern Africa 3.959427 0.04660958 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=-163.4521 on 4 df, p=1, n=1930
## Wald test = 764.3549 on 4 df, p = 0logistf(formula = o ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) 89.41031753 122.38104962 -154.4304391 333.9806243
## Year -0.04619848 0.06066604 -0.1674539 0.0746582
## RegionSouthern Africa 0.13047017 0.49889656 -0.9492056 1.0502107
## RegionSouthern Asia 0.80257902 0.32102979 0.1628516 1.4368821
## RegionWestern Africa 1.04434411 0.52649863 -0.1271791 1.9920392
## Chisq p method
## (Intercept) 0.51663877 0.47227902 2
## Year 0.56138425 0.45370305 2
## RegionSouthern Africa 0.06645363 0.79657206 2
## RegionSouthern Asia 6.00001856 0.01430573 2
## RegionWestern Africa 3.13651422 0.07655726 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=8.283469 on 4 df, p=0.08172918, n=1930
## Wald test = 652.0744 on 4 df, p = 0
## Warning in logistf(o ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(o ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year,
## RegionSouthern Africa exceeded. Try to increase the number of iterations by
## passing 'logistpl.control(maxit=...)' to parameter plcontrol
## logistf(formula = o ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) 130.0000000 97.68690313 -370.0000000 630.0000000
## Year -0.0660204 0.04842661 -0.3141732 0.1814545
## RegionSouthern Africa 1.6455483 0.26211084 0.7818739 4.1798852
## RegionSouthern Asia 0.2496183 0.27781972 -1.6554204 2.3170071
## RegionWestern Africa -0.3623285 0.70030246 -26.9647063 2.7204386
## Chisq p method
## (Intercept) 0.000000 1.000000e+00 2
## Year 0.000000 1.000000e+00 2
## RegionSouthern Africa 35.705987 2.294578e-09 2
## RegionSouthern Asia 0.000000 1.000000e+00 2
## RegionWestern Africa 1.851984 1.735528e-01 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=42.18109 on 4 df, p=1.530031e-08, n=1930
## Wald test = 771.5683 on 4 df, p = 0
## Warning in logistf(o ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(o ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year,
## RegionSouthern Africa, RegionSouthern Asia exceeded. Try to increase the number
## of iterations by passing 'logistpl.control(maxit=...)' to parameter plcontrol
## logistf(formula = o ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) -130.00000000 63.88852738 -630.0000000 370.0000000
## Year 0.06331655 0.03166235 -0.1848777 0.3107992
## RegionSouthern Africa -0.41351104 0.26628667 -47.3208673 3.5797213
## RegionSouthern Asia 0.54091770 0.16604418 -1.4644141 4.8213351
## RegionWestern Africa 0.33890169 0.34736667 -12.7171776 5.6005327
## Chisq p method
## (Intercept) 0.000000 1.000000000 2
## Year 0.000000 1.000000000 2
## RegionSouthern Africa 0.000000 1.000000000 2
## RegionSouthern Asia Inf 0.000000000 2
## RegionWestern Africa 6.844984 0.008889051 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=-53.35984 on 4 df, p=1, n=1930
## Wald test = 826.8288 on 4 df, p = 0
## Warning in logistf(o ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(o ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year,
## RegionSouthern Asia exceeded. Try to increase the number of iterations by
## passing 'logistpl.control(maxit=...)' to parameter plcontrol
## logistf(formula = o ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) -130.00000000 100.71414226 -630.0000000 370.0000000
## Year 0.06272069 0.04991153 -0.1855357 0.3101549
## RegionSouthern Africa 0.37827946 0.34942716 -4.1754673 7.5149182
## RegionSouthern Asia 0.69279079 0.26254376 -1.0930093 7.8427191
## RegionWestern Africa 0.80477518 0.47878684 -6.5785507 8.5314061
## Chisq p method
## (Intercept) 0.000000 1.000000e+00 2
## Year 0.000000 1.000000e+00 2
## RegionSouthern Africa 27.152921 1.879809e-07 2
## RegionSouthern Asia Inf 0.000000e+00 2
## RegionWestern Africa 9.053097 2.622502e-03 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=-12.67631 on 4 df, p=1, n=1930
## Wald test = 770.6723 on 4 df, p = 0
## Warning in logistf(o ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(o ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year,
## RegionSouthern Africa, RegionSouthern Asia exceeded. Try to increase the number
## of iterations by passing 'logistpl.control(maxit=...)' to parameter plcontrol
## logistf(formula = o ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) -130.00000000 241.7152036 -630.0000000 370.0000000
## Year 0.06189060 0.1197875 -0.1862222 0.3094036
## RegionSouthern Africa -0.05452735 0.9079278 -13.2421907 5.8485309
## RegionSouthern Asia 0.12435699 0.6771676 -4.6149708 5.8730352
## RegionWestern Africa 1.36551196 0.8421475 -2.8098983 7.9488429
## Chisq p method
## (Intercept) 0.000000 1.00000000 2
## Year 0.000000 1.00000000 2
## RegionSouthern Africa 0.000000 1.00000000 2
## RegionSouthern Asia 2.109106 0.14642487 2
## RegionWestern Africa 5.213434 0.02241303 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=-2.843686 on 4 df, p=1, n=1930
## Wald test = 335.8981 on 4 df, p = 0
logistic_sig_o <- as_tibble(oresult, rownames="variable") %>% filter(p<0.05) %>% left_join(obayes$locus_rank)
## Joining with `by = join_by(locus)`
p<-0.05
region_5pc_o <- obayes$region_est %>% select(locus, subgroup, mean) %>%
pivot_wider(id_cols=locus, names_from=subgroup, values_from = mean) %>%
mutate(d1=abs(`Eastern Africa`-`Southern Africa`),
d2=abs(`Eastern Africa`-`Western Africa`),
d3=abs(`Eastern Africa`-`Southern Asia`),
d4=abs(`Southern Africa`-`Southern Asia`),
d5=abs(`Southern Africa`-`Western Africa`),
d6=abs(`Western Africa`-`Southern Asia`)) %>%
filter(d1>p | d2>p | d3>p | d4>p | d5>p | d6>p)
Fig S5 - overlapping regional densities for O types - log2
scale
obayes$region_post %>%
left_join(obayes$locus_rank) %>%
filter(rank<=10) %>%
mutate(Region=fct_relevel(subgroup,c("Western Africa", "Southern Africa", "Eastern Africa", "Southern Asia"))) %>%
ggplot() +
geom_hline(yintercept=11) +
geom_hline(yintercept=21) +
geom_density_ridges(aes(x=log2(prob), y=locus, fill=Region),
scale=1, rel_min_height = 0.01, alpha=0.5, col=NA) +
scale_y_discrete(limits=rev(obayes$locus_rank$locus[1:10])) +
scale_x_continuous(limits=c(-13,0)) +
labs(x="log2(prevalence) per region", y="", fill="Region") +
annotate(y=region_5pc_o$locus, label="*", x=-1, geom="text") +
annotate(y=logistic_sig_o$locus, label="^", x=-0.5, geom="text") +
scale_fill_manual(values=region_cols) +
theme_bw()
## Joining with `by = join_by(locus)`
## Picking joint bandwidth of 0.0645
## Warning: Removed 54 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).

ggsave("figures/FigS5_RegionalOBayesDist.pdf", width=6, height=5, device = cairo_pdf, family = "Arial Unicode MS")
## Picking joint bandwidth of 0.0645
## Warning: Removed 54 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).
ggsave("figures/FigS5_RegionalOBayesDist.png", width=6, height=5)
## Picking joint bandwidth of 0.0645
## Warning: Removed 54 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).
Appendix Fig S1.3 = crude annual K rate per region
K_crude_perRegion_perYear <- data_NNS_sites10 %>%
raw_adj_prop(grouping_vars = c("K_locus", "Region", "Year"), summarise_by = "Region", adj_vars = c("Cluster", "Region", "Year"))
## grouping_var: Region in adj_vars, removing from adj_vars
## grouping_var: Year in adj_vars, removing from adj_vars
## Grouping vars: K_locus, Region, Year
## Summarising by: Region
## Adj vars: Cluster
K_crude_perRegion_perYear <- K_crude_perRegion_perYear %>% group_by(Region, Year) %>%
summarise(n=sum(adj_count)) %>% left_join(K_crude_perRegion_perYear) %>%
mutate(adj_prop = adj_count/n) %>% select(-raw_sum, -adj_sum, -raw_prop, -raw_count)
## `summarise()` has grouped output by 'Region'. You can override using the
## `.groups` argument.
## Joining with `by = join_by(Region, Year)`
# numbers for text
data_NNS_sites10 %>% filter(K_locus=="KL51") %>% group_by(Year, Region) %>% count()
## # A tibble: 7 × 3
## # Groups: Year, Region [7]
## Year Region n
## <dbl> <chr> <int>
## 1 2013 Southern Asia 1
## 2 2016 Eastern Africa 1
## 3 2018 Southern Asia 3
## 4 2019 Southern Asia 9
## 5 2020 Southern Asia 3
## 6 2021 Southern Asia 1
## 7 2023 Southern Asia 5
data_NNS_sites10 %>% filter(K_locus=="KL51") %>% group_by(Region, Site) %>% count()
## # A tibble: 7 × 3
## # Groups: Region, Site [7]
## Region Site n
## <chr> <dbl> <int>
## 1 Eastern Africa 33 1
## 2 Southern Asia 2 4
## 3 Southern Asia 8 3
## 4 Southern Asia 12 2
## 5 Southern Asia 37 9
## 6 Southern Asia 46 2
## 7 Southern Asia 47 2
data_NNS_sites10 %>% filter(K_locus=="KL51") %>% group_by(Region, Study) %>% count()
## # A tibble: 5 × 3
## # Groups: Region, Study [5]
## Region Study n
## <chr> <chr> <int>
## 1 Eastern Africa MLW 1
## 2 Southern Asia AKU 5
## 3 Southern Asia CHRF 9
## 4 Southern Asia DH 4
## 5 Southern Asia NeoOBS-India 4
data_NNS_sites10 %>% filter(K_locus=="KL51" & Year>=2018) %>% group_by(Region) %>% count()
## # A tibble: 1 × 2
## # Groups: Region [1]
## Region n
## <chr> <int>
## 1 Southern Asia 21
data_NNS_sites10 %>% filter( Year>=2018) %>% group_by(Region) %>% count()
## # A tibble: 4 × 2
## # Groups: Region [4]
## Region n
## <chr> <int>
## 1 Eastern Africa 464
## 2 Southern Africa 232
## 3 Southern Asia 397
## 4 Western Africa 26
data_NNS_sites10 %>% filter(K_locus=="KL149") %>% group_by(Year, Region) %>% count()
## # A tibble: 5 × 3
## # Groups: Year, Region [5]
## Year Region n
## <dbl> <chr> <int>
## 1 2018 Southern Asia 1
## 2 2019 Southern Africa 24
## 3 2019 Southern Asia 2
## 4 2020 Southern Africa 21
## 5 2021 Eastern Africa 1
data_NNS_sites10 %>% filter(K_locus=="KL149") %>% group_by(Region, Site) %>% count()
## # A tibble: 9 × 3
## # Groups: Region, Site [9]
## Region Site n
## <chr> <dbl> <int>
## 1 Eastern Africa 52 1
## 2 Southern Africa 27 17
## 3 Southern Africa 28 1
## 4 Southern Africa 29 3
## 5 Southern Africa 30 6
## 6 Southern Africa 31 1
## 7 Southern Africa 32 2
## 8 Southern Africa 42 15
## 9 Southern Asia 37 3
data_NNS_sites10 %>% filter(K_locus=="KL149") %>% group_by(Region, Study) %>% count()
## # A tibble: 4 × 3
## # Groups: Region, Study [4]
## Region Study n
## <chr> <chr> <int>
## 1 Eastern Africa MBIRA 1
## 2 Southern Africa BabyGERMS 30
## 3 Southern Africa GBS-COP 15
## 4 Southern Asia CHRF 3
data_NNS_sites10 %>% filter(K_locus=="KL149" & Year %in% c(2019, 2020)) %>% group_by(Region) %>% count()
## # A tibble: 2 × 2
## # Groups: Region [2]
## Region n
## <chr> <int>
## 1 Southern Africa 45
## 2 Southern Asia 2
data_NNS_sites10 %>% filter(Year %in% c(2019, 2020)) %>% group_by(Region) %>% count()
## # A tibble: 4 × 2
## # Groups: Region [4]
## Region n
## <chr> <int>
## 1 Eastern Africa 136
## 2 Southern Africa 193
## 3 Southern Asia 206
## 4 Western Africa 2
K_crude_perRegion_perYear %>%
left_join(kbayes$locus_rank %>% rename(K_locus=locus)) %>%
filter(rank <=30 | K_locus %in% c("KL116", "KL53", "KL81")) %>%
ggplot(aes(x=Year, fill=Region, y=adj_prop)) +
geom_col() +
theme_bw() +
theme(axis.text.x = element_text(angle=45, hjust=1, size=8),
axis.text.y=element_text(size=10)) +
facet_wrap(~rank+K_locus, scales="free_y") +
theme(legend.position="bottom") +
scale_fill_manual(values=region_cols)
## Joining with `by = join_by(K_locus)`

ggsave("figures/AppendixFigS1.3_crudeRegionAnnualRateK.pdf", width=7, height=9)
ggsave("figures/AppendixFigS1.3_crudeRegionAnnualRateK.png", width=7, height=9)
Appendix Fig S1.4 - simple est per year per O/region
O_crude_perRegion_perYear <- data_NNS_sites10 %>%
raw_adj_prop(grouping_vars = c("O_genotype", "Region", "Year"), summarise_by = "Region", adj_vars = c("Cluster", "Region", "Year"))
## grouping_var: Region in adj_vars, removing from adj_vars
## grouping_var: Year in adj_vars, removing from adj_vars
## Grouping vars: O_genotype, Region, Year
## Summarising by: Region
## Adj vars: Cluster
O_crude_perRegion_perYear <- O_crude_perRegion_perYear %>% group_by(Region, Year) %>%
summarise(n=sum(adj_count)) %>% left_join(O_crude_perRegion_perYear) %>%
mutate(adj_prop = adj_count/n) %>% select(-raw_sum, -adj_sum, -raw_prop, -raw_count)
## `summarise()` has grouped output by 'Region'. You can override using the
## `.groups` argument.
## Joining with `by = join_by(Region, Year)`
O_crude_perRegion_perYear %>%
left_join(obayes$locus_rank %>% rename(O_genotype=locus)) %>%
#filter(rank <=12) %>%
ggplot(aes(x=Year, fill=Region, y=adj_prop)) +
geom_col() +
theme_bw() +
theme(axis.text.x = element_text(angle=45, hjust=1, size=10),
axis.text.y=element_text(size=10)) +
facet_wrap(~rank+O_genotype, scales="free_y", ncol=3) +
theme(legend.position="bottom") +
scale_fill_manual(values=region_cols) + labs(fill="")
## Joining with `by = join_by(O_genotype)`

ggsave("figures/AppendixFigS1.4_crudeRegionAnnualRateO.pdf", width=6, height=8, device = cairo_pdf, family = "Arial Unicode MS")
ggsave("figures/AppendixFigS1.4_crudeRegionAnnualRateO.png", width=6, height=8)
effect of clustering - modelled estimates
kbayes_global <- full_join(kbayes$global_est, kbayes_raw$global_est, by="locus", suffix=c(".adj", ".raw"))
bayes_global_K_raw_vs_adj <- kbayes_global %>%
ggplot(aes(x=mean.raw*100, y=mean.adj*100)) +
geom_point() + theme_bw() +
labs(x="Bayesian estimate (%), raw", y="Bayesian estimate (%), cluster-adjusted") +
geom_abline(slope=1, intercept=0) +
geom_text(data=kbayes_global[kbayes_global$locus %in% kbayes$locus_rank$locus[1:5],], aes(x=mean.raw*100, y=mean.adj*100, label=locus), hjust=1, nudge_x=-0.2, size=2.5)
obayes_global <- full_join(obayes$global_est, obayes_raw$global_est, by="locus", suffix=c(".adj", ".raw"))
bayes_global_O_raw_vs_adj <- obayes_global %>%
ggplot(aes(x=mean.raw*100, y=mean.adj*100)) +
geom_point() + theme_bw() +
labs(x="Bayesian estimate (%), raw", y="Bayesian estimate (%), cluster-adjusted") +
geom_abline(slope=1, intercept=0) +
geom_text(data=obayes_global[obayes_global$locus %in% obayes$locus_rank$locus[1:9],], aes(x=mean.raw*100, y=mean.adj*100, label=locus), hjust=1, nudge_x=-0.2, size=2.5)
Appendix Figure S2.4 - cluster vs raw, global K
( (bayes_global_K_raw_vs_adj + ggtitle("a) K locus (mean estimate)") ) + (bayes_global_O_raw_vs_adj + ggtitle("b) O type (mean estimate)")) + patchwork::plot_layout(axes="collect") ) / (k_dist_raw_adj + ggtitle ("c) K locus (posterior distribution)") + o_dist_raw_adj + ggtitle ("d) O type (posterior distribution)") + patchwork::plot_layout(guides="collect")) + patchwork::plot_layout(heights=c(1,2)) & theme(legend.position='bottom')
## Picking joint bandwidth of 0.0586
## Warning: Removed 58 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).
## Picking joint bandwidth of 0.103
## Warning: Removed 257 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).

ggsave("figures/AppendixFigS2.4_clusterVsRaw.pdf", width=8, height=11, device = cairo_pdf, family = "Arial Unicode MS")
## Picking joint bandwidth of 0.0586
## Warning: Removed 58 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).
## Picking joint bandwidth of 0.103
## Warning: Removed 257 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).
ggsave("figures/AppendixFigS2.4_clusterVsRaw.png", width=8, height=11)
## Picking joint bandwidth of 0.0586
## Warning: Removed 58 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).
## Picking joint bandwidth of 0.103
## Warning: Removed 257 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).
Effect of cluster adjustment on crude proportion per site
K_crude_perSite <- data_NNS_sites10 %>%
raw_adj_prop(grouping_vars = c("K_locus", "Site"), summarise_by = "Site", adj_vars = c("Cluster", "Site")) %>%
mutate(raw_prop = raw_count/raw_sum) %>%
mutate(adj_prop = adj_count/adj_sum)
## grouping_var: Site in adj_vars, removing from adj_vars
## Grouping vars: K_locus, Site
## Summarising by: Site
## Adj vars: Cluster
O_crude_perSite <- data_NNS_sites10 %>%
raw_adj_prop(grouping_vars = c("O_genotype", "Site"), summarise_by = "Site", adj_vars = c("Cluster", "Site")) %>%
mutate(raw_prop = raw_count/raw_sum) %>%
mutate(adj_prop = adj_count/adj_sum)
## grouping_var: Site in adj_vars, removing from adj_vars
## Grouping vars: O_genotype, Site
## Summarising by: Site
## Adj vars: Cluster
# KL25
K_crude_raw_adj_scatter_KL25 <- K_crude_perSite %>%
filter(K_locus=="KL25") %>%
ggplot(aes(x=raw_prop, y=adj_prop)) +
geom_point() +
geom_abline(slope=1, intercept=0) +
theme_bw() +
labs(x="Crude proportion, raw", y="Crude proportion, adjusted")
# KL2
K_crude_raw_adj_scatter_KL2 <- K_crude_perSite %>%
filter(K_locus=="KL2") %>%
ggplot(aes(x=raw_prop, y=adj_prop)) +
geom_point() +
geom_abline(slope=1, intercept=0) +
theme_bw() +
labs(x="Crude proportion, raw", y="Crude proportion, adjusted")
# KL102
K_crude_raw_adj_scatter_KL102 <- K_crude_perSite %>%
filter(K_locus=="KL102") %>%
ggplot(aes(x=raw_prop, y=adj_prop)) +
geom_point() +
geom_abline(slope=1, intercept=0) +
theme_bw() +
labs(x="Crude proportion, raw", y="Crude proportion, adjusted")
# KL62
K_crude_raw_adj_scatter_KL62 <- K_crude_perSite %>%
filter(K_locus=="KL62") %>%
ggplot(aes(x=raw_prop, y=adj_prop)) +
geom_point() +
geom_abline(slope=1, intercept=0) +
theme_bw() +
labs(x="Crude proportion, raw", y="Crude proportion, adjusted")
# O1v1
O_crude_raw_adj_scatter_O1v1 <- O_crude_perSite %>%
filter(O_genotype=="O1⍺β,2⍺") %>%
ggplot(aes(x=raw_prop, y=adj_prop)) +
geom_point() +
geom_abline(slope=1, intercept=0) +
theme_bw() +
labs(x="Crude proportion, raw", y="Crude proportion, adjusted")
# O1v2
O_crude_raw_adj_scatter_O1v2 <- O_crude_perSite %>%
filter(O_genotype=="O1⍺β,2β") %>%
ggplot(aes(x=raw_prop, y=adj_prop)) +
geom_point() +
geom_abline(slope=1, intercept=0) +
theme_bw() +
labs(x="Crude proportion, raw", y="Crude proportion, adjusted")
# O2afg
O_crude_raw_adj_scatter_O2afg <- O_crude_perSite %>%
filter(O_genotype=="O2β") %>%
ggplot(aes(x=raw_prop, y=adj_prop)) +
geom_point() +
geom_abline(slope=1, intercept=0) +
theme_bw() +
labs(x="Crude proportion, raw", y="Crude proportion, adjusted")
# O4
O_crude_raw_adj_scatter_O4 <- O_crude_perSite %>%
filter(O_genotype=="O4") %>%
ggplot(aes(x=raw_prop, y=adj_prop)) +
geom_point() +
geom_abline(slope=1, intercept=0) +
theme_bw() +
labs(x="Crude proportion, raw", y="Crude proportion, adjusted")
Appendix Fig S2.3 - Effect of cluster adjustment on crude
proportion
(K_crude_raw_adj_scatter_KL102 + ggtitle("a) KL102, per-site") + K_crude_raw_adj_scatter_KL2 + ggtitle("b) KL2, per-site")) / (K_crude_raw_adj_scatter_KL25 + ggtitle("c) KL25, per-site") + K_crude_raw_adj_scatter_KL62 + ggtitle("d) KL62, per-site")) / (O_crude_raw_adj_scatter_O1v1 + ggtitle("e) O1⍺β,2⍺, per-site") + O_crude_raw_adj_scatter_O1v2 + ggtitle("f) O1⍺β,2β, per-site")) / (O_crude_raw_adj_scatter_O2afg + ggtitle("g) O2β, per-site") + O_crude_raw_adj_scatter_O4 + ggtitle("h) O4, per-site"))

ggsave("figures/AppendixFigS2.3_EffectClusteringCrude.pdf", width=8, height=10, device = cairo_pdf, family = "Arial Unicode MS")
ggsave("figures/AppendixFigS2.3_EffectClusteringCrude.png", width=8, height=10)
numbers for text - K prevalence estimates
kbayes$global_est
## # A tibble: 90 × 6
## locus median mean lower upper rank
## <chr> <dbl> <dbl> <dbl> <dbl> <int>
## 1 KL2 0.0895 0.0898 0.0716 0.110 1
## 2 KL102 0.0872 0.0875 0.0696 0.107 2
## 3 KL25 0.0823 0.0825 0.0646 0.102 3
## 4 KL15 0.0576 0.0579 0.0433 0.0745 4
## 5 KL62 0.0451 0.0455 0.0323 0.0604 5
## 6 KL30 0.0341 0.0345 0.0230 0.0479 6
## 7 KL10 0.0266 0.0270 0.0169 0.0392 7
## 8 KL17 0.0255 0.0259 0.0161 0.0380 8
## 9 KL23 0.0242 0.0246 0.0151 0.0366 9
## 10 KL51 0.0229 0.0233 0.0144 0.0346 10
## # ℹ 80 more rows
kbayes$global_est %>% filter(rank<=5)
## # A tibble: 5 × 6
## locus median mean lower upper rank
## <chr> <dbl> <dbl> <dbl> <dbl> <int>
## 1 KL2 0.0895 0.0898 0.0716 0.110 1
## 2 KL102 0.0872 0.0875 0.0696 0.107 2
## 3 KL25 0.0823 0.0825 0.0646 0.102 3
## 4 KL15 0.0576 0.0579 0.0433 0.0745 4
## 5 KL62 0.0451 0.0455 0.0323 0.0604 5
kbayes$global_est %>% filter(median>0.02)
## # A tibble: 12 × 6
## locus median mean lower upper rank
## <chr> <dbl> <dbl> <dbl> <dbl> <int>
## 1 KL2 0.0895 0.0898 0.0716 0.110 1
## 2 KL102 0.0872 0.0875 0.0696 0.107 2
## 3 KL25 0.0823 0.0825 0.0646 0.102 3
## 4 KL15 0.0576 0.0579 0.0433 0.0745 4
## 5 KL62 0.0451 0.0455 0.0323 0.0604 5
## 6 KL30 0.0341 0.0345 0.0230 0.0479 6
## 7 KL10 0.0266 0.0270 0.0169 0.0392 7
## 8 KL17 0.0255 0.0259 0.0161 0.0380 8
## 9 KL23 0.0242 0.0246 0.0151 0.0366 9
## 10 KL51 0.0229 0.0233 0.0144 0.0346 10
## 11 KL112 0.0230 0.0233 0.0142 0.0347 11
## 12 KL39 0.0218 0.0222 0.0131 0.0335 12
kbayes$global_est %>% filter(median>0.01)
## # A tibble: 27 × 6
## locus median mean lower upper rank
## <chr> <dbl> <dbl> <dbl> <dbl> <int>
## 1 KL2 0.0895 0.0898 0.0716 0.110 1
## 2 KL102 0.0872 0.0875 0.0696 0.107 2
## 3 KL25 0.0823 0.0825 0.0646 0.102 3
## 4 KL15 0.0576 0.0579 0.0433 0.0745 4
## 5 KL62 0.0451 0.0455 0.0323 0.0604 5
## 6 KL30 0.0341 0.0345 0.0230 0.0479 6
## 7 KL10 0.0266 0.0270 0.0169 0.0392 7
## 8 KL17 0.0255 0.0259 0.0161 0.0380 8
## 9 KL23 0.0242 0.0246 0.0151 0.0366 9
## 10 KL51 0.0229 0.0233 0.0144 0.0346 10
## # ℹ 17 more rows
kbayes$global_est %>% filter(locus=="KL149")
## # A tibble: 1 × 6
## locus median mean lower upper rank
## <chr> <dbl> <dbl> <dbl> <dbl> <int>
## 1 KL149 0.0193 0.0197 0.0113 0.0298 13
kbayes$region_est %>% filter(locus=="KL149")
## # A tibble: 4 × 6
## # Groups: locus [1]
## locus subgroup median mean lower upper
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 KL149 Eastern Africa 0.00414 0.00482 0.000794 0.0127
## 2 KL149 Southern Africa 0.0783 0.0802 0.0430 0.127
## 3 KL149 Southern Asia 0.00583 0.00697 0.00108 0.0191
## 4 KL149 Western Africa 0.00546 0.00840 0.000459 0.0334
kbayes$global_est %>% filter(locus=="KL15")
## # A tibble: 1 × 6
## locus median mean lower upper rank
## <chr> <dbl> <dbl> <dbl> <dbl> <int>
## 1 KL15 0.0576 0.0579 0.0433 0.0745 4
kbayes$region_est %>% filter(locus=="KL15")
## # A tibble: 4 × 6
## # Groups: locus [1]
## locus subgroup median mean lower upper
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 KL15 Eastern Africa 0.0580 0.0587 0.0375 0.0843
## 2 KL15 Southern Africa 0.00943 0.0112 0.00186 0.0300
## 3 KL15 Southern Asia 0.0902 0.0913 0.0585 0.130
## 4 KL15 Western Africa 0.0325 0.0374 0.00728 0.0946
kbayes$global_est %>% filter(locus=="KL51")
## # A tibble: 1 × 6
## locus median mean lower upper rank
## <chr> <dbl> <dbl> <dbl> <dbl> <int>
## 1 KL51 0.0229 0.0233 0.0144 0.0346 10
kbayes$region_est %>% filter(locus=="KL51")
## # A tibble: 4 × 6
## # Groups: locus [1]
## locus subgroup median mean lower upper
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 KL51 Eastern Africa 0.00363 0.00433 0.000635 0.0120
## 2 KL51 Southern Africa 0.00229 0.00346 0.000195 0.0135
## 3 KL51 Southern Asia 0.0682 0.0698 0.0415 0.106
## 4 KL51 Western Africa 0.00418 0.00680 0.000287 0.0279
kbayes$global_est %>% filter(locus=="KL81")
## # A tibble: 1 × 6
## locus median mean lower upper rank
## <chr> <dbl> <dbl> <dbl> <dbl> <int>
## 1 KL81 0.0119 0.0123 0.00584 0.0209 26
kbayes$region_est %>% filter(locus=="KL81")
## # A tibble: 4 × 6
## # Groups: locus [1]
## locus subgroup median mean lower upper
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 KL81 Eastern Africa 0.00156 0.00217 0.000153 0.00742
## 2 KL81 Southern Africa 0.00152 0.00251 0.000104 0.0108
## 3 KL81 Southern Asia 0.0350 0.0362 0.0163 0.0630
## 4 KL81 Western Africa 0.00262 0.00480 0.000155 0.0222
kbayes$global_est %>% filter(locus=="KL28")
## # A tibble: 1 × 6
## locus median mean lower upper rank
## <chr> <dbl> <dbl> <dbl> <dbl> <int>
## 1 KL28 0.0131 0.0135 0.00679 0.0223 22
kbayes$region_est %>% filter(locus=="KL28")
## # A tibble: 4 × 6
## # Groups: locus [1]
## locus subgroup median mean lower upper
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 KL28 Eastern Africa 0.0127 0.0135 0.00481 0.0269
## 2 KL28 Southern Africa 0.00616 0.00782 0.000906 0.0242
## 3 KL28 Southern Asia 0.00367 0.00463 0.000512 0.0145
## 4 KL28 Western Africa 0.0618 0.0669 0.0201 0.140
kbayes$global_est %>% filter(locus=="KL116")
## # A tibble: 1 × 6
## locus median mean lower upper rank
## <chr> <dbl> <dbl> <dbl> <dbl> <int>
## 1 KL116 0.00579 0.00615 0.00199 0.0124 36
kbayes$region_est %>% filter(locus=="KL116")
## # A tibble: 4 × 6
## # Groups: locus [1]
## locus subgroup median mean lower upper
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 KL116 Eastern Africa 0.00159 0.00218 0.000158 0.00752
## 2 KL116 Southern Africa 0.00158 0.00261 0.000108 0.0110
## 3 KL116 Southern Asia 0.00212 0.00301 0.000183 0.0109
## 4 KL116 Western Africa 0.0511 0.0561 0.0135 0.129
kbayes$global_est %>% filter(locus=="KL53")
## # A tibble: 1 × 6
## locus median mean lower upper rank
## <chr> <dbl> <dbl> <dbl> <dbl> <int>
## 1 KL53 0.00448 0.00487 0.00132 0.0106 48
kbayes$region_est %>% filter(locus=="KL53")
## # A tibble: 4 × 6
## # Groups: locus [1]
## locus subgroup median mean lower upper
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 KL53 Eastern Africa 0.00144 0.00201 0.000140 0.00727
## 2 KL53 Southern Africa 0.00137 0.00228 0.0000881 0.0101
## 3 KL53 Southern Asia 0.00192 0.00276 0.000154 0.0103
## 4 KL53 Western Africa 0.0352 0.0404 0.00747 0.100
numbers for text - O antigen
obayes$global_est
## # A tibble: 15 × 6
## locus median mean lower upper rank
## <chr> <dbl> <dbl> <dbl> <dbl> <int>
## 1 O1⍺β,2⍺ 0.267 0.267 0.236 0.299 1
## 2 O1⍺β,2β 0.218 0.218 0.191 0.248 2
## 3 O2β 0.158 0.158 0.135 0.184 3
## 4 O4 0.0905 0.0908 0.0720 0.111 4
## 5 O2⍺ 0.0611 0.0615 0.0458 0.0794 5
## 6 O3𝛾 0.0597 0.0601 0.0449 0.0775 6
## 7 O5 0.0535 0.0539 0.0394 0.0702 7
## 8 O13 0.0487 0.0490 0.0352 0.0647 8
## 9 O3⍺/O3β 0.0279 0.0283 0.0181 0.0405 9
## 10 O15 0.00329 0.00369 0.000744 0.00893 10
## 11 O12 0.00209 0.00248 0.000311 0.00683 11
## 12 O10 0.00206 0.00246 0.000282 0.00691 12
## 13 O14 0.000853 0.00123 0.0000318 0.00442 13
## 14 O2⍺𝛾 0.000844 0.00122 0.0000334 0.00439 14
## 15 O1⍺β,2⍺𝛾 0.000843 0.00121 0.0000312 0.00446 15
obayes$global_est %>% filter(rank<=5) %>% pull(median) %>% sum()
## [1] 0.7947118
obayes$global_est %>% filter(rank<=5) %>% pull(lower) %>% sum()
## [1] 0.6802535
obayes$global_est %>% filter(rank<=5) %>% pull(upper) %>% sum()
## [1] 0.9214287
obayes$global_est
## # A tibble: 15 × 6
## locus median mean lower upper rank
## <chr> <dbl> <dbl> <dbl> <dbl> <int>
## 1 O1⍺β,2⍺ 0.267 0.267 0.236 0.299 1
## 2 O1⍺β,2β 0.218 0.218 0.191 0.248 2
## 3 O2β 0.158 0.158 0.135 0.184 3
## 4 O4 0.0905 0.0908 0.0720 0.111 4
## 5 O2⍺ 0.0611 0.0615 0.0458 0.0794 5
## 6 O3𝛾 0.0597 0.0601 0.0449 0.0775 6
## 7 O5 0.0535 0.0539 0.0394 0.0702 7
## 8 O13 0.0487 0.0490 0.0352 0.0647 8
## 9 O3⍺/O3β 0.0279 0.0283 0.0181 0.0405 9
## 10 O15 0.00329 0.00369 0.000744 0.00893 10
## 11 O12 0.00209 0.00248 0.000311 0.00683 11
## 12 O10 0.00206 0.00246 0.000282 0.00691 12
## 13 O14 0.000853 0.00123 0.0000318 0.00442 13
## 14 O2⍺𝛾 0.000844 0.00122 0.0000334 0.00439 14
## 15 O1⍺β,2⍺𝛾 0.000843 0.00121 0.0000312 0.00446 15
numbers for text - O differences by region
# where is O4 found
data_NNS_sites10 %>% filter(O_genotype=="O4") %>% group_by(Region, Study, Site, ST, K_locus) %>% count() %>% arrange(-n)
## # A tibble: 52 × 6
## # Groups: Region, Study, Site, ST, K_locus [52]
## Region Study Site ST K_locus n
## <chr> <chr> <dbl> <chr> <chr> <int>
## 1 Southern Asia CHRF 37 ST11 KL15 59
## 2 Eastern Africa BARNARDS 17 ST37 KL15 28
## 3 Eastern Africa MLW 33 ST340 KL15 18
## 4 Southern Asia NeoOBS-India 46 ST11 KL15 18
## 5 Southern Africa BabyGERMS 27 ST152 KL149 16
## 6 Southern Asia CHRF 37 ST502 KL15 15
## 7 Southern Africa BabyGERMS 28 ST405 KL151 9
## 8 Eastern Africa MLW 33 ST45 KL15 8
## 9 Southern Asia CHRF 38 ST11 KL15 7
## 10 Eastern Africa Kilifi 49 ST585 KL15 6
## # ℹ 42 more rows
data_NNS_sites10 %>% filter(O_genotype=="O4" & Region=="Southern Asia") %>% group_by(ST, K_locus) %>% count() %>% arrange(-n)
## # A tibble: 16 × 3
## # Groups: ST, K_locus [16]
## ST K_locus n
## <chr> <chr> <int>
## 1 ST11 KL15 91
## 2 ST502 KL15 15
## 3 ST437 KL36 10
## 4 ST340 KL15 5
## 5 ST152 KL149 3
## 6 ST273 KL15 3
## 7 ST2258 KL131 2
## 8 ST37 KL15 2
## 9 ST231 KL144 1
## 10 ST273 KL141 1
## 11 ST3623 KL15 1
## 12 ST392 KL27 1
## 13 ST429 KL27 1
## 14 ST70 KL15 1
## 15 ST726 KL15 1
## 16 ST883-1LV KL15 1
data_NNS_sites10 %>% filter(O_genotype=="O4" & Region=="Southern Asia") %>% group_by(ST, K_locus, Site) %>% count() %>% arrange(-n)
## # A tibble: 27 × 4
## # Groups: ST, K_locus, Site [27]
## ST K_locus Site n
## <chr> <chr> <dbl> <int>
## 1 ST11 KL15 37 59
## 2 ST11 KL15 46 18
## 3 ST502 KL15 37 15
## 4 ST11 KL15 38 7
## 5 ST11 KL15 2 6
## 6 ST437 KL36 37 6
## 7 ST152 KL149 37 3
## 8 ST273 KL15 37 3
## 9 ST340 KL15 35 2
## 10 ST340 KL15 37 2
## # ℹ 17 more rows
data_NNS_sites10 %>% filter(O_genotype=="O2a.v1") %>% group_by(Region, Study, Site, ST, K_locus) %>% count() %>% arrange(-n)
## # A tibble: 0 × 6
## # Groups: Region, Study, Site, ST, K_locus [0]
## # ℹ 6 variables: Region <chr>, Study <chr>, Site <dbl>, ST <chr>,
## # K_locus <chr>, n <int>
data_NNS_sites10 %>% filter(O_genotype=="O2a.v1" & Region=="Southern Asia") %>% group_by(ST, K_locus) %>% count() %>% arrange(-n)
## # A tibble: 0 × 3
## # Groups: ST, K_locus [0]
## # ℹ 3 variables: ST <chr>, K_locus <chr>, n <int>
data_NNS_sites10 %>% filter(O_genotype=="O2a.v1" & Region=="Southern Asia") %>% group_by(ST, K_locus, Site) %>% count() %>% arrange(-n)
## # A tibble: 0 × 4
## # Groups: ST, K_locus, Site [0]
## # ℹ 4 variables: ST <chr>, K_locus <chr>, Site <dbl>, n <int>
data_NNS_sites10 %>% filter(O_genotype=="O5") %>% group_by(Region, Study, Site, ST, K_locus) %>% count() %>% arrange(-n)
## # A tibble: 21 × 6
## # Groups: Region, Study, Site, ST, K_locus [21]
## Region Study Site ST K_locus n
## <chr> <chr> <dbl> <chr> <chr> <int>
## 1 Eastern Africa Kilifi 49 ST17 KL25 15
## 2 Southern Africa GBS-COP 42 ST17 KL25 9
## 3 Southern Africa BabyGERMS 30 ST17 KL25 7
## 4 Eastern Africa MLW 33 ST17 KL25 4
## 5 Southern Africa BabyGERMS 29 ST17 KL25 4
## 6 Eastern Africa Kilifi 49 ST336 KL25 3
## 7 Southern Africa BabyGERMS 28 ST17 KL25 3
## 8 Southern Africa NIMBIplus 59 ST17 KL25 3
## 9 Southern Asia AKU 8 ST17 KL25 3
## 10 Southern Africa BabyGERMS 27 ST17 KL25 2
## # ℹ 11 more rows
data_NNS_sites10 %>% filter(O_genotype=="O5" & Region=="Southern Africa") %>% group_by(ST, K_locus) %>% count() %>% arrange(-n)
## # A tibble: 3 × 3
## # Groups: ST, K_locus [3]
## ST K_locus n
## <chr> <chr> <int>
## 1 ST17 KL25 32
## 2 ST2621 KL25 1
## 3 ST3184 KL25 1
data_NNS_sites10 %>% filter(O_genotype=="O5" & Region=="Southern Africa") %>% group_by(ST, K_locus, Site) %>% count() %>% arrange(-n)
## # A tibble: 10 × 4
## # Groups: ST, K_locus, Site [10]
## ST K_locus Site n
## <chr> <chr> <dbl> <int>
## 1 ST17 KL25 42 9
## 2 ST17 KL25 30 7
## 3 ST17 KL25 29 4
## 4 ST17 KL25 28 3
## 5 ST17 KL25 59 3
## 6 ST17 KL25 27 2
## 7 ST17 KL25 31 2
## 8 ST17 KL25 32 2
## 9 ST2621 KL25 30 1
## 10 ST3184 KL25 32 1
Global and regional coverage - top20 global - Fig3bi
kbayes_raw_coverage <- getCumulativeCoverageGlobalRegional(kbayes_raw, kbayes$locus_rank) %>%
mutate(subgroup=fct_relevel(subgroup,c("Global", "Southern Asia", "Eastern Africa", "Southern Africa", "Western Africa")))
coverage_globalTop20_globalRegional <- plotCoverage(kbayes_raw_coverage, kbayes$locus_rank, maxRank=20, cols=region_cols)

coverage_globalTop20_globalRegional

fatal - read posterior estimates for adjusted counts (main) and raw
counts (for comparison)
# read raw Bayesian estimates for K - using fatal samples from sites with at least 10 fatal
kbayes_fatal_raw_min10perSite <- parseModelledEstimates(global_post="outputs_core/K_Fatal_min10_raw_28_posterior_global.csv.gz", region_post="outputs_core/K_Fatal_min10_raw_28_posterior_subgroup.csv.gz")
## Rows: 576000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 2304000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
kbayes_fatal_raw_min10perSite_coverage <- getCumulativeCoverage(kbayes$locus_rank, kbayes_fatal_raw_min10perSite$global_post) %>% mutate(type="min10")
ESBL - read posterior estimates for adjusted counts (main) and raw
counts (for comparison)
kbayes_esbl_raw_min10perSite <- parseModelledEstimates(global_post="outputs_core/K_ESBL_min10_raw_28_posterior_global.csv.gz", region_post="outputs_core/K_ESBL_min10_raw_28_posterior_subgroup.csv.gz")
## Rows: 960000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3840000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
kbayes_esbl_raw_min10perSite_coverage <- getCumulativeCoverage(kbayes$locus_rank, kbayes_esbl_raw_min10perSite$global_post) %>% mutate(type="min10")
CP - read posterior estimates for adjusted counts (main) and raw
counts (for comparison)
kbayes_cp_raw_min10perSite <- parseModelledEstimates(global_post="outputs_core/K_Carba_min10_raw_28_posterior_global.csv.gz", region_post="outputs_core/K_Carba_min10_raw_28_posterior_subgroup.csv.bz2")
## Rows: 2940000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 8820000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
kbayes_cp_raw_min10perSite_coverage <- getCumulativeCoverage(kbayes$locus_rank, kbayes_cp_raw_min10perSite$global_post) %>% mutate(type="min10")
combine coverage estimates for subgroups (fatal, ESBL, CP) -
Fig3ai
kbayes_raw_coverage_subgroups <- kbayes_esbl_raw_min10perSite_coverage %>% mutate(subgroup="ESBL") %>%
bind_rows(kbayes_cp_raw_min10perSite_coverage %>% mutate(subgroup="CP")) %>%
bind_rows(kbayes_fatal_raw_min10perSite_coverage %>% mutate(subgroup="Fatal")) %>%
bind_rows(kbayes_raw_coverage %>% filter(subgroup=="Global") %>% mutate(subgroup="All") ) %>%
mutate(subgroup=fct_relevel(subgroup,c("All", "Fatal", "ESBL", "CP")))
coverage_globalTop20_subgroups <- plotCoverage(kbayes_raw_coverage_subgroups, kbayes$locus_rank, maxRank=20, cols=group_cols)

coverage_globalTop20_subgroups

take top 8 per region - Fig3aii and Fig3bii
## extract cluster-adjusted coverage per region
K_rank_SA <- kbayes$region_est %>% ungroup %>% filter(subgroup=="Southern Africa") %>% arrange(-mean) %>% mutate(rank=row_number())
K_rank_WA <- kbayes$region_est %>% ungroup %>% filter(subgroup=="Western Africa") %>% arrange(-mean) %>% mutate(rank=row_number())
K_rank_EA <- kbayes$region_est %>% ungroup %>% filter(subgroup=="Eastern Africa") %>% arrange(-mean) %>% mutate(rank=row_number())
K_rank_SAs <- kbayes$region_est %>% ungroup %>% filter(subgroup=="Southern Asia") %>% arrange(-mean) %>% mutate(rank=row_number())
# get top-8 within each region - gives 20 loci
n<-8
topN <- bind_rows(K_rank_SA, K_rank_EA) %>% bind_rows(K_rank_WA) %>% bind_rows(K_rank_SAs) %>% filter(rank<=n) %>% pull(locus) %>% unique()
length(unique(topN))
## [1] 20
k_rank_region8 <- kbayes$locus_rank[kbayes$locus_rank$locus %in% topN,] %>%
mutate(rank=1:(length(unique(topN))))
# calculate global & regional coverage for these K - Fig 3bii
kbayes_raw_coverage_top8perRegion <- getCumulativeCoverageGlobalRegional(kbayes_raw, k_rank_region8) %>%
mutate(subgroup=fct_relevel(subgroup,c("Global", "Southern Asia", "Eastern Africa", "Southern Africa", "Western Africa")))
coverage_global_top8perRegion <- plotCoverage(kbayes_raw_coverage_top8perRegion, k_rank_region8, maxRank=nrow(k_rank_region8), cols=region_cols)

coverage_global_top8perRegion

# calculate subgroup coverage for these K - Fig 3aii
kbayes_fatal_raw_min10perSite_coverage_top8perRegion <- getCumulativeCoverage(k_rank_region8, kbayes_fatal_raw_min10perSite$global_post)
kbayes_esbl_raw_min10perSite_coverage_top8perRegion <- getCumulativeCoverage(k_rank_region8, kbayes_esbl_raw_min10perSite$global_post)
kbayes_cp_raw_min10perSite_coverage_top8perRegion <- getCumulativeCoverage(k_rank_region8, kbayes_cp_raw_min10perSite$global_post)
kbayes_raw_coverage_subgroups_top8perRegion <- kbayes_esbl_raw_min10perSite_coverage_top8perRegion %>% mutate(subgroup="ESBL") %>%
bind_rows(kbayes_cp_raw_min10perSite_coverage_top8perRegion %>% mutate(subgroup="CP")) %>%
bind_rows(kbayes_fatal_raw_min10perSite_coverage_top8perRegion %>% mutate(subgroup="Fatal")) %>%
bind_rows(kbayes_raw_coverage_top8perRegion %>% filter(subgroup=="Global") %>% mutate(subgroup="All") ) %>%
mutate(subgroup=fct_relevel(subgroup,c("All", "Fatal", "ESBL", "CP")))
coverage_subgroups_top8perRegion <- plotCoverage(kbayes_raw_coverage_subgroups_top8perRegion, k_rank_region8, maxRank=nrow(k_rank_region8), cols=group_cols)

coverage_subgroups_top8perRegion

(coverage_globalTop20_subgroups + ggtitle("a) Global coverage, by case type", subtitle="KL set 1: global top-20") + coverage_subgroups_top8perRegion + ggtitle("", subtitle="KL set 2: top-8 per region") + patchwork::plot_layout(guides="collect", axes="collect")) / (coverage_globalTop20_globalRegional + ggtitle("b) Per-region coverage", subtitle="KL set 1: global top-20") + coverage_global_top8perRegion + ggtitle("", subtitle="KL set 2: top-8 per region") + patchwork::plot_layout(guides="collect", axes="collect"))

ggsave("figures/Fig3_Kcoverage_raw.pdf", width=8, height=8)
ggsave("figures/Fig3_Kcoverage_raw.png", width=8, height=8)
Table 2 - coverage stats for all/ESBL/CP/fatal, for global and
regional
kbayes_fatal_raw_min10perSite_coverageRegional <- getCumulativeCoverageGlobalRegional(ranks=kbayes$locus_rank, bayes=kbayes_fatal_raw_min10perSite, maxRank=20)
kbayes_esbl_raw_min10perSite_coverageRegional <- getCumulativeCoverageGlobalRegional(ranks=kbayes$locus_rank, bayes=kbayes_esbl_raw_min10perSite, maxRank=20)
kbayes_cp_raw_min10perSite_coverageRegional <- getCumulativeCoverageGlobalRegional(ranks=kbayes$locus_rank, bayes=kbayes_cp_raw_min10perSite, maxRank=20)
table2 <- kbayes_raw_coverage %>% mutate(cases="All") %>%
bind_rows(kbayes_fatal_raw_min10perSite_coverageRegional %>% mutate(cases=" Fatal infections")) %>%
bind_rows(kbayes_esbl_raw_min10perSite_coverageRegional %>% mutate(cases=" ESBL infections")) %>%
bind_rows(kbayes_cp_raw_min10perSite_coverageRegional %>% mutate(cases=" CP infections")) %>%
filter(rank %in% c(5,10,15,20)) %>%
mutate(upper=if_else(upper>1, 1, upper)) %>%
mutate(summary=paste0(round(mean,3)*100," (",round(lower,3)*100,"-",round(upper,3)*100,")")) %>%
select(rank, subgroup, cases, summary) %>%
pivot_wider(names_from=rank, values_from=summary, id_cols=c(subgroup, cases)) %>%
rename(Region=subgroup)
add totals per subgroup
fatal_counts <- data_NNS_sites10 %>% filter(Mortality==1) %>% group_by(Region) %>% count()
fatal_counts <- fatal_counts %>% bind_rows(list(Region="Global", n=sum(fatal_counts$n)))
esbl_counts <- data_NNS_sites10 %>% filter(resistance_score>=1) %>% group_by(Region) %>% count()
esbl_counts <- esbl_counts %>% bind_rows(list(Region="Global", n=sum(esbl_counts$n)))
cp_counts <- data_NNS_sites10 %>% filter(resistance_score>=2) %>% group_by(Region) %>% count()
cp_counts <- cp_counts %>% bind_rows(list(Region="Global", n=sum(cp_counts$n)))
total_counts <- data_NNS_sites10 %>% group_by(Region) %>% count()
total_counts <- total_counts %>% bind_rows(list(Region="Global", n=sum(total_counts$n)))
table2 <- esbl_counts %>% mutate(cases=" ESBL infections") %>%
bind_rows(cp_counts%>% mutate(cases=" CP infections")) %>%
bind_rows(fatal_counts %>% mutate(cases=" Fatal infections")) %>%
bind_rows(total_counts %>% mutate(cases="All")) %>%
full_join(table2, by=c("Region", "cases")) %>%
mutate(cases=fct_relevel(cases,c("All", " Fatal infections", " ESBL infections", " CP infections")))
# sort function isn't working when ordering region factor using fct_relevel
table2$Region <- factor(table2$Region, levels = c("Global", "Eastern Africa", "Southern Africa", "Western Africa", "Southern Asia"))
table2 <- table2 %>%
arrange(Region, cases) %>%
relocate(n, .after=cases) %>%
mutate(cases=if_else(cases=="All", Region, cases)) %>%
rename_with(~ paste0(.x, " KL"), c("5", "10", "15", "20")) %>%
rename(N=n) %>%
rename(Data=cases) %>%
ungroup %>% select(-Region)
write_tsv(table2, file="tables/Table2_coverage.tsv")
Table S4 - K set2 coverage stats for all/ESBL/CP/fatal, for global
and regional
kbayes_fatal_raw_min10perSite_coverageRegional_top8perRegion <- getCumulativeCoverageGlobalRegional(ranks=k_rank_region8, bayes=kbayes_fatal_raw_min10perSite, maxRank=20)
kbayes_esbl_raw_min10perSite_coverageRegional_top8perRegion <- getCumulativeCoverageGlobalRegional(ranks=k_rank_region8, bayes=kbayes_esbl_raw_min10perSite, maxRank=20)
kbayes_cp_raw_min10perSite_coverageRegional_top8perRegion <- getCumulativeCoverageGlobalRegional(ranks=k_rank_region8, bayes=kbayes_cp_raw_min10perSite, maxRank=20)
tableS4 <- kbayes_raw_coverage_top8perRegion %>% mutate(cases="All") %>%
bind_rows(kbayes_fatal_raw_min10perSite_coverageRegional_top8perRegion %>% mutate(cases=" Fatal infections")) %>%
bind_rows(kbayes_esbl_raw_min10perSite_coverageRegional_top8perRegion %>% mutate(cases=" ESBL infections")) %>%
bind_rows(kbayes_cp_raw_min10perSite_coverageRegional_top8perRegion %>% mutate(cases=" CP infections")) %>%
mutate(upper=if_else(upper>1, 1, upper)) %>%
filter(rank %in% c(5,10,15,20)) %>%
mutate(summary=paste0(round(mean,3)*100," (",round(lower,3)*100,"-",round(upper,3)*100,")")) %>%
select(rank, subgroup, cases, summary) %>%
pivot_wider(names_from=rank, values_from=summary, id_cols=c(subgroup, cases)) %>%
rename(Region=subgroup)
tableS4 <- esbl_counts %>% mutate(cases=" ESBL infections") %>%
bind_rows(cp_counts%>% mutate(cases=" CP infections")) %>%
bind_rows(fatal_counts %>% mutate(cases=" Fatal infections")) %>%
bind_rows(total_counts %>% mutate(cases="All")) %>%
full_join(tableS4, by=c("Region", "cases")) %>%
mutate(cases=fct_relevel(cases,c("All", " Fatal infections", " ESBL infections", " CP infections")))
# sort function isn't working when ordering region factor using fct_relevel
tableS4$Region <- factor(tableS4$Region, levels = c("Global", "Eastern Africa", "Southern Africa", "Western Africa", "Southern Asia"))
tableS4 <- tableS4 %>%
arrange(Region, cases) %>%
relocate(n, .after=cases) %>%
mutate(cases=if_else(cases=="All", Region, cases)) %>%
rename_with(~ paste0(.x, " KL"), c("5", "10", "15", "20")) %>%
rename(N=n) %>%
rename(Data=cases) %>%
ungroup %>% select(-Region)
write_tsv(tableS4, file="tables/TableS4_coverageSet2.tsv")
Fig S4 - coverage for region-focused K sets
# ranked by Southern Africa
kbayes_raw_coverage_top20SA <- getCumulativeCoverageGlobalRegional(kbayes_raw, K_rank_SA %>% select(locus, rank), maxRank=20) %>%
mutate(subgroup=fct_relevel(subgroup,c("Global", "Southern Asia", "Eastern Africa", "Southern Africa", "Western Africa")))
kbayes_raw_coverage_globalRegional_top20SA <- plotCoverage(kbayes_raw_coverage_top20SA, K_rank_SA %>% select(locus, rank), maxRank=20, cols=region_cols)

# ranked by Eastern Africa
kbayes_raw_coverage_top20EA <- getCumulativeCoverageGlobalRegional(kbayes_raw, K_rank_EA %>% select(locus, rank), maxRank=20) %>%
mutate(subgroup=fct_relevel(subgroup,c("Global", "Southern Asia", "Eastern Africa", "Southern Africa", "Western Africa")))
kbayes_raw_coverage_globalRegional_top20EA <- plotCoverage(kbayes_raw_coverage_top20EA, K_rank_EA %>% select(locus, rank), maxRank=20, cols=region_cols)

# ranked by Western Africa
kbayes_raw_coverage_top20WA <- getCumulativeCoverageGlobalRegional(kbayes_raw, K_rank_WA %>% select(locus, rank), maxRank=20) %>%
mutate(subgroup=fct_relevel(subgroup,c("Global", "Southern Asia", "Eastern Africa", "Southern Africa", "Western Africa")))
kbayes_raw_coverage_globalRegional_top20WA <- plotCoverage(kbayes_raw_coverage_top20WA, K_rank_WA %>% select(locus, rank), maxRank=20, cols=region_cols)

# ranked by Southern Asia
kbayes_raw_coverage_top20SAs <- getCumulativeCoverageGlobalRegional(kbayes_raw, K_rank_SAs %>% select(locus, rank), maxRank=20) %>%
mutate(subgroup=fct_relevel(subgroup,c("Global", "Southern Asia", "Eastern Africa", "Southern Africa", "Western Africa")))
kbayes_raw_coverage_globalRegional_top20SAs <- plotCoverage(kbayes_raw_coverage_top20SAs, K_rank_SAs %>% select(locus, rank), maxRank=20, cols=region_cols)

(kbayes_raw_coverage_globalRegional_top20SA + ggtitle("Top-20 K loci in S. Africa") + kbayes_raw_coverage_globalRegional_top20EA + ggtitle("Top-20 K loci in E. Africa")) / (kbayes_raw_coverage_globalRegional_top20WA + ggtitle("Top-20 K loci in W. Africa") + kbayes_raw_coverage_globalRegional_top20SAs + ggtitle("Top-20 K loci in S. Asia")) + patchwork::plot_layout(guides="collect")

ggsave("figures/FigS4_KcoverageRaw_top20PerRegion.pdf", width=8, height=8)
ggsave("figures/FigS4_KcoverageRaw_top20PerRegion.png", width=8, height=8)
numbers for text - K coverage
# coverage of top-20
kbayes_raw_coverage %>% filter(rank %in% c(3,5,10,15,20) & subgroup=="Global") %>% select(mean, lower, upper) %>% mutate(rank=c(3,5,10,15,20))
## # A tibble: 5 × 4
## mean lower upper rank
## <dbl> <dbl> <dbl> <dbl>
## 1 0.351 0.328 0.376 3
## 2 0.488 0.460 0.517 5
## 3 0.587 0.556 0.619 10
## 4 0.670 0.636 0.704 15
## 5 0.728 0.693 0.764 20
kbayes_raw_coverage %>% filter(rank==10 & subgroup=="Global") %>% select(mean, lower, upper) - (kbayes_raw_coverage %>% filter(rank==5 & subgroup=="Global") %>% select(mean, lower, upper))
## mean lower upper
## 1 0.09899569 0.09608937 0.1023605
kbayes_raw_coverage %>% filter(rank==15 & subgroup=="Global") %>% select(mean, lower, upper) - (kbayes_raw_coverage %>% filter(rank==10 & subgroup=="Global") %>% select(mean, lower, upper))
## mean lower upper
## 1 0.08240133 0.08031075 0.0843404
kbayes_raw_coverage %>% filter(rank==20 & subgroup=="Global") %>% select(mean, lower, upper) - (kbayes_raw_coverage %>% filter(rank==15 & subgroup=="Global") %>% select(mean, lower, upper))
## mean lower upper
## 1 0.05813391 0.05676994 0.05986134
# available data on fatal cases
data_NNS_sites10 %>% filter(!is.na(Mortality)) %>% nrow()
## [1] 1063
data_NNS_sites10 %>% filter(Mortality==1) %>% nrow()
## [1] 394
# coverage of region-focused subsets
kbayes_raw_coverage_globalRegional_top20SA$data %>% filter(rank==20)
## # A tibble: 5 × 7
## rank mean median lower upper locus subgroup
## <int> <dbl> <dbl> <dbl> <dbl> <chr> <fct>
## 1 20 0.709 0.709 0.675 0.744 KL28 Global
## 2 20 0.735 0.735 0.690 0.783 KL28 Eastern Africa
## 3 20 0.569 0.568 0.509 0.633 KL28 Southern Asia
## 4 20 0.904 0.903 0.794 1 KL28 Southern Africa
## 5 20 0.601 0.596 0.451 0.774 KL28 Western Africa
kbayes_raw_coverage_globalRegional_top20EA$data %>% filter(rank==20)
## # A tibble: 5 × 7
## rank mean median lower upper locus subgroup
## <int> <dbl> <dbl> <dbl> <dbl> <chr> <fct>
## 1 20 0.715 0.715 0.681 0.751 KL64 Global
## 2 20 0.810 0.810 0.762 0.861 KL64 Eastern Africa
## 3 20 0.576 0.575 0.516 0.642 KL64 Southern Asia
## 4 20 0.590 0.589 0.503 0.683 KL64 Southern Africa
## 5 20 0.622 0.618 0.470 0.796 KL64 Western Africa
kbayes_raw_coverage_globalRegional_top20WA$data %>% filter(rank==20)
## # A tibble: 5 × 7
## rank mean median lower upper locus subgroup
## <int> <dbl> <dbl> <dbl> <dbl> <chr> <fct>
## 1 20 0.696 0.696 0.662 0.730 KL30 Global
## 2 20 0.786 0.786 0.739 0.836 KL30 Eastern Africa
## 3 20 0.532 0.531 0.474 0.593 KL30 Southern Asia
## 4 20 0.581 0.579 0.494 0.672 KL30 Southern Africa
## 5 20 0.798 0.794 0.620 0.995 KL30 Western Africa
kbayes_raw_coverage_globalRegional_top20SAs$data %>% filter(rank==20)
## # A tibble: 5 × 7
## rank mean median lower upper locus subgroup
## <int> <dbl> <dbl> <dbl> <dbl> <chr> <fct>
## 1 20 0.690 0.690 0.656 0.725 KL54 Global
## 2 20 0.682 0.681 0.638 0.727 KL54 Eastern Africa
## 3 20 0.807 0.806 0.735 0.883 KL54 Southern Asia
## 4 20 0.566 0.565 0.481 0.657 KL54 Southern Africa
## 5 20 0.450 0.446 0.322 0.600 KL54 Western Africa
# coverage of KL set 2
kbayes_raw_coverage_top8perRegion%>% filter(rank==20)
## # A tibble: 5 × 7
## rank mean median lower upper locus subgroup
## <int> <dbl> <dbl> <dbl> <dbl> <chr> <fct>
## 1 20 0.729 0.729 0.694 0.765 KL53 Global
## 2 20 0.715 0.715 0.670 0.763 KL53 Eastern Africa
## 3 20 0.719 0.718 0.651 0.791 KL53 Southern Asia
## 4 20 0.822 0.821 0.721 0.928 KL53 Southern Africa
## 5 20 0.698 0.695 0.533 0.882 KL53 Western Africa
kbayes_raw_coverage_subgroups_top8perRegion %>% filter(rank==20)
## # A tibble: 4 × 7
## rank mean median lower upper locus subgroup
## <int> <dbl> <dbl> <dbl> <dbl> <chr> <fct>
## 1 20 0.749 0.749 0.712 0.788 KL53 ESBL
## 2 20 0.815 0.815 0.741 0.891 KL53 CP
## 3 20 0.706 0.705 0.628 0.785 KL53 Fatal
## 4 20 0.729 0.729 0.694 0.765 KL53 All
# what is extra in set2 that is not in top-20?
k_rank_region8 %>% left_join(kbayes$locus, by="locus") %>% filter(rank.y>20)
## # A tibble: 5 × 3
## locus rank.x rank.y
## <chr> <int> <int>
## 1 KL28 16 22
## 2 KL8 17 23
## 3 KL81 18 26
## 4 KL116 19 36
## 5 KL53 20 48
Global and regional O coverage - top10 global - Fig4d
obayes_raw_coverage <- getCumulativeCoverageGlobalRegional(obayes_raw, obayes$locus_rank) %>%
mutate(subgroup=fct_relevel(subgroup,c("Global", "Southern Asia", "Eastern Africa", "Southern Africa", "Western Africa")))
ocoverage_globalTop10_globalRegional <- plotCoverage(obayes_raw_coverage, obayes$locus_rank, maxRank=10, xintercept=c(4,8), cols=region_cols, col_title = "d) Region")

ocoverage_globalTop10_globalRegional

O fatal - read posterior estimates for adjusted counts (main) and
raw counts (for comparison)
obayes_fatal_raw_min10perSite <- parseModelledEstimates(global_post="outputs_core/O_Fatal_min10_raw_28_posterior_global.csv.gz", region_post="outputs_core/O_Fatal_min10_raw_28_posterior_subgroup.csv.gz", fixOnames = T)
## Rows: 132000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 528000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
obayes_fatal_raw_min10perSite_coverage <- getCumulativeCoverage(obayes$locus_rank, obayes_fatal_raw_min10perSite$global_post) %>% mutate(type="min10")
ESBL - read posterior estimates for adjusted counts (main) and raw
counts (for comparison)
obayes_esbl_raw_min10perSite <- parseModelledEstimates(global_post="outputs_core/O_ESBL_min10_raw_28_posterior_global.csv.gz", region_post="outputs_core/O_ESBL_min10_raw_28_posterior_subgroup.csv.gz", fixOnames = T)
## Rows: 156000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 624000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
obayes_esbl_raw_min10perSite_coverage <- getCumulativeCoverage(obayes$locus_rank, obayes_esbl_raw_min10perSite$global_post) %>% mutate(type="min10")
CP - read posterior estimates for adjusted counts (main) and raw
counts (for comparison)
obayes_cp_raw_min10perSite <- parseModelledEstimates(global_post="outputs_core/O_Carba_min10_raw_28_posterior_global.csv.gz", region_post="outputs_core/O_Carba_min10_raw_28_posterior_subgroup.csv.gz", fixOnames = T)
## Rows: 120000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 360000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
obayes_cp_raw_min10perSite_coverage <- getCumulativeCoverage(obayes$locus_rank, obayes_cp_raw_min10perSite$global_post) %>% mutate(type="min10")
combine coverage estimates for subgroups (fatal, ESBL, CP) -
Fig4c
obayes_raw_coverage_subgroups <- obayes_esbl_raw_min10perSite_coverage %>% mutate(subgroup="ESBL") %>%
bind_rows(obayes_cp_raw_min10perSite_coverage %>% mutate(subgroup="CP")) %>%
bind_rows(obayes_fatal_raw_min10perSite_coverage %>% mutate(subgroup="Fatal")) %>%
bind_rows(obayes_raw_coverage %>% filter(subgroup=="Global") %>% mutate(subgroup="All") ) %>%
mutate(subgroup=fct_relevel(subgroup,c("All", "Fatal", "ESBL", "CP")))
o_coverage_globalTop10_subgroups <- plotCoverage(obayes_raw_coverage_subgroups, obayes$locus_rank, maxRank=10, xintercept=c(5,10), cols=group_cols, col_title = "c) Subgroup")

o_coverage_globalTop10_subgroups

(o_prev_global_dist + o_prev_region_heatmap) / (o_coverage_globalTop10_subgroups + ggtitle("c) Global coverage") + ocoverage_globalTop10_globalRegional + ggtitle("d) Regional coverage") + patchwork::plot_layout(guides="collect"))
## Picking joint bandwidth of 0.0901
## Warning: Removed 257 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).

ggsave("figures/Fig4_O_prevAdj_coverageRaw.pdf", width=8, height=8, device = cairo_pdf, family = "Arial Unicode MS")
## Picking joint bandwidth of 0.0901
## Warning: Removed 257 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).
ggsave("figures/Fig4_O_prevAdj_coverageRaw.png", width=8, height=8)
## Picking joint bandwidth of 0.0901
## Warning: Removed 257 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).
numbers for text - O prevalence and coverage
# O prevalence
obayes$global_est
## # A tibble: 15 × 6
## locus median mean lower upper rank
## <chr> <dbl> <dbl> <dbl> <dbl> <int>
## 1 O1⍺β,2⍺ 0.267 0.267 0.236 0.299 1
## 2 O1⍺β,2β 0.218 0.218 0.191 0.248 2
## 3 O2β 0.158 0.158 0.135 0.184 3
## 4 O4 0.0905 0.0908 0.0720 0.111 4
## 5 O2⍺ 0.0611 0.0615 0.0458 0.0794 5
## 6 O3𝛾 0.0597 0.0601 0.0449 0.0775 6
## 7 O5 0.0535 0.0539 0.0394 0.0702 7
## 8 O13 0.0487 0.0490 0.0352 0.0647 8
## 9 O3⍺/O3β 0.0279 0.0283 0.0181 0.0405 9
## 10 O15 0.00329 0.00369 0.000744 0.00893 10
## 11 O12 0.00209 0.00248 0.000311 0.00683 11
## 12 O10 0.00206 0.00246 0.000282 0.00691 12
## 13 O14 0.000853 0.00123 0.0000318 0.00442 13
## 14 O2⍺𝛾 0.000844 0.00122 0.0000334 0.00439 14
## 15 O1⍺β,2⍺𝛾 0.000843 0.00121 0.0000312 0.00446 15
obayes$region_est
## # A tibble: 60 × 6
## # Groups: locus [15]
## locus subgroup median mean lower upper
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 O10 Eastern Africa 0.00133 0.00176 0.000144 0.00587
## 2 O10 Southern Africa 0.00131 0.00189 0.000121 0.00720
## 3 O10 Southern Asia 0.00310 0.00395 0.000379 0.0123
## 4 O10 Western Africa 0.00152 0.00229 0.000145 0.00886
## 5 O12 Eastern Africa 0.00179 0.00231 0.000218 0.00739
## 6 O12 Southern Africa 0.00135 0.00192 0.000129 0.00704
## 7 O12 Southern Asia 0.00239 0.00314 0.000295 0.0101
## 8 O12 Western Africa 0.00155 0.00235 0.000146 0.00910
## 9 O13 Eastern Africa 0.0461 0.0469 0.0289 0.0695
## 10 O13 Southern Africa 0.0158 0.0172 0.00436 0.0381
## # ℹ 50 more rows
obayes$region_est %>% filter(locus=="O2afg")
## # A tibble: 0 × 6
## # Groups: locus [0]
## # ℹ 6 variables: locus <chr>, subgroup <chr>, median <dbl>, mean <dbl>,
## # lower <dbl>, upper <dbl>
obayes$region_est %>% filter(locus=="O2a")
## # A tibble: 0 × 6
## # Groups: locus [0]
## # ℹ 6 variables: locus <chr>, subgroup <chr>, median <dbl>, mean <dbl>,
## # lower <dbl>, upper <dbl>
obayes$region_est %>% filter(locus=="O4")
## # A tibble: 4 × 6
## # Groups: locus [1]
## locus subgroup median mean lower upper
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 O4 Eastern Africa 0.0724 0.0732 0.0497 0.101
## 2 O4 Southern Africa 0.0705 0.0724 0.0398 0.115
## 3 O4 Southern Asia 0.137 0.138 0.0984 0.184
## 4 O4 Western Africa 0.0540 0.0574 0.0192 0.116
obayes$region_est %>% filter(locus=="O5")
## # A tibble: 4 × 6
## # Groups: locus [1]
## locus subgroup median mean lower upper
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 O5 Eastern Africa 0.0322 0.0330 0.0181 0.0523
## 2 O5 Southern Africa 0.119 0.121 0.0743 0.176
## 3 O5 Southern Asia 0.0482 0.0495 0.0272 0.0782
## 4 O5 Western Africa 0.0276 0.0305 0.00729 0.0704
obayes$region_est %>% filter(locus=="O3b")
## # A tibble: 0 × 6
## # Groups: locus [0]
## # ℹ 6 variables: locus <chr>, subgroup <chr>, median <dbl>, mean <dbl>,
## # lower <dbl>, upper <dbl>
obayes$region_est %>% filter(locus=="OL13")
## # A tibble: 0 × 6
## # Groups: locus [0]
## # ℹ 6 variables: locus <chr>, subgroup <chr>, median <dbl>, mean <dbl>,
## # lower <dbl>, upper <dbl>
# coverage
obayes_raw_coverage
## # A tibble: 150 × 7
## rank mean median lower upper locus subgroup
## <int> <dbl> <dbl> <dbl> <dbl> <chr> <fct>
## 1 1 0.259 0.259 0.240 0.278 O1⍺β,2⍺ Global
## 2 2 0.437 0.437 0.411 0.463 O1⍺β,2β Global
## 3 3 0.681 0.681 0.649 0.713 O2β Global
## 4 4 0.811 0.811 0.776 0.846 O4 Global
## 5 5 0.862 0.862 0.826 0.899 O2⍺ Global
## 6 6 0.890 0.890 0.853 0.927 O3𝛾 Global
## 7 7 0.925 0.925 0.888 0.964 O5 Global
## 8 8 0.973 0.972 0.934 1.01 O13 Global
## 9 9 0.991 0.991 0.951 1.03 O3⍺/O3β Global
## 10 10 0.992 0.992 0.952 1.03 O15 Global
## # ℹ 140 more rows
# STs with O4 or O2a in Southern Asia
data_NNS_sites10 %>% filter(Region=="Southern Asia" & O_genotype=="O4") %>% group_by(ST, K_locus)%>% count()
## # A tibble: 16 × 3
## # Groups: ST, K_locus [16]
## ST K_locus n
## <chr> <chr> <int>
## 1 ST11 KL15 91
## 2 ST152 KL149 3
## 3 ST2258 KL131 2
## 4 ST231 KL144 1
## 5 ST273 KL141 1
## 6 ST273 KL15 3
## 7 ST340 KL15 5
## 8 ST3623 KL15 1
## 9 ST37 KL15 2
## 10 ST392 KL27 1
## 11 ST429 KL27 1
## 12 ST437 KL36 10
## 13 ST502 KL15 15
## 14 ST70 KL15 1
## 15 ST726 KL15 1
## 16 ST883-1LV KL15 1
data_NNS_sites10 %>% filter(Region=="Southern Asia" & O_genotype=="O4") %>% group_by(ST, K_locus, Site)%>% count()
## # A tibble: 27 × 4
## # Groups: ST, K_locus, Site [27]
## ST K_locus Site n
## <chr> <chr> <dbl> <int>
## 1 ST11 KL15 2 6
## 2 ST11 KL15 35 1
## 3 ST11 KL15 37 59
## 4 ST11 KL15 38 7
## 5 ST11 KL15 46 18
## 6 ST152 KL149 37 3
## 7 ST2258 KL131 35 1
## 8 ST2258 KL131 38 1
## 9 ST231 KL144 8 1
## 10 ST273 KL141 37 1
## # ℹ 17 more rows
data_NNS_sites10 %>% filter(Region=="Southern Asia" & O_genotype=="O2a") %>% group_by(ST, K_locus)%>% count()
## # A tibble: 0 × 3
## # Groups: ST, K_locus [0]
## # ℹ 3 variables: ST <chr>, K_locus <chr>, n <int>
data_NNS_sites10 %>% filter(Region=="Southern Asia" & O_genotype=="O2a") %>% group_by(ST, K_locus, Site)%>% count()
## # A tibble: 0 × 4
## # Groups: ST, K_locus, Site [0]
## # ℹ 4 variables: ST <chr>, K_locus <chr>, Site <dbl>, n <int>
data_NNS_sites10 %>% filter(Region=="Southern Africa" & O_genotype=="O5") %>% group_by(ST, K_locus)%>% count()
## # A tibble: 3 × 3
## # Groups: ST, K_locus [3]
## ST K_locus n
## <chr> <chr> <int>
## 1 ST17 KL25 32
## 2 ST2621 KL25 1
## 3 ST3184 KL25 1
data_NNS_sites10 %>% filter(Region=="Southern Africa" & O_genotype=="O5") %>% group_by(ST, K_locus, Site, Country)%>% count()
## # A tibble: 10 × 5
## # Groups: ST, K_locus, Site, Country [10]
## ST K_locus Site Country n
## <chr> <chr> <dbl> <chr> <int>
## 1 ST17 KL25 27 South Africa 2
## 2 ST17 KL25 28 South Africa 3
## 3 ST17 KL25 29 South Africa 4
## 4 ST17 KL25 30 South Africa 7
## 5 ST17 KL25 31 South Africa 2
## 6 ST17 KL25 32 South Africa 2
## 7 ST17 KL25 42 South Africa 9
## 8 ST17 KL25 59 Botswana 3
## 9 ST2621 KL25 30 South Africa 1
## 10 ST3184 KL25 32 South Africa 1
# top-4 coverage
obayes_raw_coverage %>% filter(rank==4)
## # A tibble: 5 × 7
## rank mean median lower upper locus subgroup
## <int> <dbl> <dbl> <dbl> <dbl> <chr> <fct>
## 1 4 0.811 0.811 0.776 0.846 O4 Global
## 2 4 0.869 0.869 0.823 0.917 O4 Eastern Africa
## 3 4 0.695 0.694 0.630 0.761 O4 Southern Asia
## 4 4 0.762 0.760 0.604 0.932 O4 Western Africa
## 5 4 0.793 0.792 0.696 0.896 O4 Southern Africa
obayes_raw_coverage_subgroups %>% filter(rank==4)
## # A tibble: 4 × 8
## rank mean median lower upper locus type subgroup
## <int> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <fct>
## 1 4 0.825 0.825 0.787 0.862 O4 min10 ESBL
## 2 4 0.698 0.698 0.633 0.764 O4 min10 CP
## 3 4 0.821 0.820 0.742 0.900 O4 min10 Fatal
## 4 4 0.811 0.811 0.776 0.846 O4 <NA> All
Table S5 - O coverage stats for all/ESBL/CP/fatal, for global and
regional
obayes_fatal_raw_min10perSite_coverageRegional <- getCumulativeCoverageGlobalRegional(ranks=obayes$locus_rank, bayes=obayes_fatal_raw_min10perSite, maxRank=10)
obayes_esbl_raw_min10perSite_coverageRegional <- getCumulativeCoverageGlobalRegional(ranks=obayes$locus_rank, bayes=obayes_esbl_raw_min10perSite, maxRank=10)
obayes_cp_raw_min10perSite_coverageRegional <- getCumulativeCoverageGlobalRegional(ranks=obayes$locus_rank, bayes=obayes_cp_raw_min10perSite, maxRank=10)
tableS5 <- obayes_raw_coverage %>% filter(rank <= 10) %>% mutate(cases="All") %>%
bind_rows(obayes_fatal_raw_min10perSite_coverageRegional %>% mutate(cases=" Fatal infections")) %>%
bind_rows(obayes_esbl_raw_min10perSite_coverageRegional %>% mutate(cases=" ESBL infections")) %>%
bind_rows(obayes_cp_raw_min10perSite_coverageRegional %>% mutate(cases=" CP infections")) %>%
mutate(upper=if_else(upper>1, 1, upper)) %>%
mutate(summary=paste0(round(mean,3)*100," (",round(lower,3)*100,"-",round(upper,3)*100,")")) %>%
select(locus, subgroup, cases, summary) %>%
pivot_wider(names_from=locus, values_from=summary, id_cols=c(subgroup, cases)) %>%
rename(Region=subgroup)
add totals per subgroup
tableS5 <- esbl_counts %>% mutate(cases=" ESBL infections") %>%
bind_rows(cp_counts %>% mutate(cases=" CP infections")) %>%
bind_rows(fatal_counts %>% mutate(cases=" Fatal infections")) %>%
bind_rows(total_counts %>% mutate(cases="All")) %>%
full_join(tableS5, by=c("Region", "cases")) %>%
mutate(cases=fct_relevel(cases,c("All", " Fatal infections", " ESBL infections", " CP infections")))
# sort function isn't working when ordering region factor using fct_relevel
tableS5$Region <- factor(tableS5$Region, levels = c("Global", "Eastern Africa", "Southern Africa", "Western Africa", "Southern Asia"))
tableS5 <- tableS5 %>%
arrange(Region, cases) %>%
relocate(n, .after=cases) %>%
mutate(cases=if_else(cases=="All", Region, cases)) %>%
rename(N=n) %>%
rename(Data=cases) %>%
ungroup %>% select(-Region)
write_tsv(tableS5, file="tables/TableS5_coverage.tsv")
numbers for text - effect of cluster adjustment
kbayes_global %>% mutate(diff=mean.raw-mean.adj) %>% select(locus, mean.raw, mean.adj, diff, rank.raw, rank.adj) %>% arrange(-diff)
## # A tibble: 90 × 6
## locus mean.raw mean.adj diff rank.raw rank.adj
## <chr> <dbl> <dbl> <dbl> <int> <int>
## 1 KL102 0.177 0.0875 0.0897 1 2
## 2 KL15 0.101 0.0579 0.0431 3 4
## 3 KL2 0.120 0.0898 0.0299 2 1
## 4 KL104 0.0331 0.00859 0.0245 6 30
## 5 KL157 0.0150 0.00124 0.0138 16 73
## 6 KL81 0.0249 0.0123 0.0126 9 26
## 7 KL108 0.0212 0.00978 0.0114 11 29
## 8 KL149 0.0254 0.0197 0.00567 8 13
## 9 KL8 0.0161 0.0124 0.00371 14 23
## 10 KL127 0.00569 0.00369 0.00200 32 53
## # ℹ 80 more rows
data_NNS_sites10 %>% filter(K_locus=="KL102") %>% group_by(Cluster, Country, ST) %>% count() %>% arrange(-n) %>% mutate(p=n/data_NNS_sites10 %>% filter(K_locus=="KL102") %>% nrow()) %>% ungroup() %>% mutate(cum=cumsum(p))
## # A tibble: 71 × 6
## Cluster Country ST n p cum
## <chr> <chr> <chr> <int> <dbl> <dbl>
## 1 SZ1 Zambia ST307 203 0.594 0.594
## 2 CHRF187 Bangladesh ST147 21 0.0614 0.655
## 3 SZ3 Zambia ST307 11 0.0322 0.687
## 4 Kilifi141 Kenya ST3247 8 0.0234 0.711
## 5 BG3 South Africa ST307 5 0.0146 0.725
## 6 CHRF120 Bangladesh ST307 5 0.0146 0.740
## 7 MB33 Ghana ST307 5 0.0146 0.754
## 8 BG33 South Africa ST307 4 0.0117 0.766
## 9 ML94 Malawi ST307 4 0.0117 0.778
## 10 SZ30 Zambia ST307 4 0.0117 0.789
## # ℹ 61 more rows
data_NNS_sites10 %>% filter(K_locus=="KL2") %>% group_by(Cluster, Country, ST) %>% count() %>% arrange(-n) %>% mutate(p=n/data_NNS_sites10 %>% filter(K_locus=="KL2") %>% nrow()) %>% ungroup() %>% mutate(cum=cumsum(p))
## # A tibble: 73 × 6
## Cluster Country ST n p cum
## <chr> <chr> <chr> <int> <dbl> <dbl>
## 1 ML31 Malawi ST39 87 0.377 0.377
## 2 ML38 Malawi ST14 23 0.0996 0.476
## 3 Kiambu sub-County Hospital30 Kenya ST14 12 0.0519 0.528
## 4 Mbagathi54 Kenya ST14 11 0.0476 0.576
## 5 ML24 Malawi ST14 8 0.0346 0.610
## 6 Kilifi86 Kenya ST14 7 0.0303 0.641
## 7 ML32 Malawi ST25 4 0.0173 0.658
## 8 ML57 Malawi ST25 4 0.0173 0.675
## 9 Kilifi125 Kenya ST101 3 0.0130 0.688
## 10 MB89 Malawi ST25 2 0.00866 0.697
## # ℹ 63 more rows
data_NNS_sites10 %>% filter(K_locus=="KL15") %>% group_by(Cluster, Country, ST) %>% count() %>% arrange(-n) %>% mutate(p=n/data_NNS_sites10 %>% filter(K_locus=="KL15") %>% nrow()) %>% ungroup() %>% mutate(cum=cumsum(p))
## # A tibble: 45 × 6
## Cluster Country ST n p cum
## <chr> <chr> <chr> <int> <dbl> <dbl>
## 1 CHRF149 Bangladesh ST11 66 0.338 0.338
## 2 BA24 Ethiopia ST37 26 0.133 0.472
## 3 CHRF169 Bangladesh ST502 14 0.0718 0.544
## 4 NEO2 India ST11 14 0.0718 0.615
## 5 ML12 Malawi ST340 7 0.0359 0.651
## 6 ML123 Malawi ST45 7 0.0359 0.687
## 7 ML16 Malawi ST340 6 0.0308 0.718
## 8 aiims10 India ST11 6 0.0308 0.749
## 9 MB51 Zambia ST5856 4 0.0205 0.769
## 10 BA20 Ethiopia ST37 3 0.0154 0.785
## # ℹ 35 more rows
data_NNS_sites10 %>% filter(K_locus=="KL104") %>% group_by(Cluster, Country, ST) %>% count() %>% arrange(-n) %>% mutate(p=n/data_NNS_sites10 %>% filter(K_locus=="KL104") %>% nrow()) %>% ungroup() %>% mutate(cum=cumsum(p))
## # A tibble: 8 × 6
## Cluster Country ST n p cum
## <chr> <chr> <chr> <int> <dbl> <dbl>
## 1 MB102 Tanzania ST1741 50 0.781 0.781
## 2 Kilifi117 Kenya ST1741 5 0.0781 0.859
## 3 aiims21 India ST1741 3 0.0469 0.906
## 4 aiims13 India ST1741 2 0.0312 0.938
## 5 MB101 Tanzania ST1741 1 0.0156 0.953
## 6 MB102 Tanzania ST1741-1LV 1 0.0156 0.969
## 7 MB99 Tanzania ST1741 1 0.0156 0.984
## 8 ML75 Malawi ST245-2LV 1 0.0156 1
data_NNS_sites10 %>% filter(K_locus=="KL157") %>% group_by(Cluster, Country, ST) %>% count() %>% arrange(-n) %>% mutate(p=n/data_NNS_sites10 %>% filter(K_locus=="KL157") %>% nrow()) %>% ungroup() %>% mutate(cum=cumsum(p))
## # A tibble: 1 × 6
## Cluster Country ST n p cum
## <chr> <chr> <chr> <int> <dbl> <dbl>
## 1 Kiambu sub-County Hospital42 Kenya ST6775 29 1 1
data_NNS_sites10 %>% filter(K_locus=="KL81") %>% group_by(Cluster, Country, ST) %>% count() %>% arrange(-n) %>% mutate(p=n/data_NNS_sites10 %>% filter(K_locus=="KL81") %>% nrow()) %>% ungroup() %>% mutate(cum=cumsum(p))
## # A tibble: 9 × 6
## Cluster Country ST n p cum
## <chr> <chr> <chr> <int> <dbl> <dbl>
## 1 CHRF182 Bangladesh ST16 28 0.583 0.583
## 2 AKU15 Pakistan ST16 7 0.146 0.729
## 3 CHRF126 Bangladesh ST16 6 0.125 0.854
## 4 AKU69 Pakistan ST16 2 0.0417 0.896
## 5 AKU39 Pakistan ST16 1 0.0208 0.917
## 6 CHRF165 Bangladesh ST16 1 0.0208 0.938
## 7 CHRF167 Bangladesh ST16 1 0.0208 0.958
## 8 CHRF176 Bangladesh ST16 1 0.0208 0.979
## 9 CHRF178 Bangladesh ST16 1 0.0208 1
data_NNS_sites10 %>% filter(K_locus=="KL108") %>% group_by(Cluster, Country, ST) %>% count() %>% arrange(-n) %>% mutate(p=n/data_NNS_sites10 %>% filter(K_locus=="KL108") %>% nrow()) %>% ungroup() %>% mutate(cum=cumsum(p))
## # A tibble: 8 × 6
## Cluster Country ST n p cum
## <chr> <chr> <chr> <int> <dbl> <dbl>
## 1 BA23 Ethiopia ST35 32 0.780 0.780
## 2 BA30 Ethiopia ST35 3 0.0732 0.854
## 3 BA25 Ethiopia ST35 1 0.0244 0.878
## 4 BA26 Ethiopia ST35 1 0.0244 0.902
## 5 BA32 Ethiopia ST35 1 0.0244 0.927
## 6 BA73 Nigeria ST395 1 0.0244 0.951
## 7 ML173 Malawi ST110 1 0.0244 0.976
## 8 WI35 South Africa ST35 1 0.0244 1
data_NNS_sites10 %>% filter(K_locus=="KL149") %>% group_by(Cluster, Country, ST) %>% count() %>% arrange(-n) %>% mutate(p=n/data_NNS_sites10 %>% filter(K_locus=="KL149") %>% nrow()) %>% ungroup() %>% mutate(cum=cumsum(p))
## # A tibble: 16 × 6
## Cluster Country ST n p cum
## <chr> <chr> <chr> <int> <dbl> <dbl>
## 1 BG27 South Africa ST152 16 0.327 0.327
## 2 WI20 South Africa ST39 10 0.204 0.531
## 3 BG77 South Africa ST39 5 0.102 0.633
## 4 CHRF139 Bangladesh ST152 3 0.0612 0.694
## 5 BG16 South Africa ST152 2 0.0408 0.735
## 6 BG37 South Africa ST152 2 0.0408 0.776
## 7 WI43 South Africa ST39 2 0.0408 0.816
## 8 BG40 South Africa ST39 1 0.0204 0.837
## 9 BG72 South Africa ST39 1 0.0204 0.857
## 10 BG84 South Africa ST152 1 0.0204 0.878
## 11 BG93 South Africa ST39 1 0.0204 0.898
## 12 BG96 South Africa ST39 1 0.0204 0.918
## 13 MB73 Ethiopia ST152 1 0.0204 0.939
## 14 WI15 South Africa ST152 1 0.0204 0.959
## 15 WI17 South Africa ST39 1 0.0204 0.980
## 16 WI36 South Africa ST152 1 0.0204 1
# how do these compare to those added to KL set 2 to get good region coverage? - only KL81
k_rank_region8 %>% left_join(kbayes$locus, by="locus") %>% filter(rank.y>20)
## # A tibble: 5 × 3
## locus rank.x rank.y
## <chr> <int> <int>
## 1 KL28 16 22
## 2 KL8 17 23
## 3 KL81 18 26
## 4 KL116 19 36
## 5 KL53 20 48
Leave One out (LOO)
Sensitivity analysis - O type, Bayesian estimate, adjusted
loo <- obayes$global_est %>% mutate(Study="None")
loo <- read_csv("outputs_LOO/O_Full_ALL_adj_28_LOO_AKU_global_estimates.csv") %>%
mutate(Study="AKU") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo)
## Rows: 15 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
loo <- read_csv("outputs_LOO/O_Full_ALL_adj_28_LOO_BabyGERMS_global_estimates.csv") %>%
mutate(Study="BabyGERMS") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo)
## Rows: 15 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
loo <- read_csv("outputs_LOO/O_Full_ALL_adj_28_LOO_BARNARDS_global_estimates.csv") %>%
mutate(Study="BARNARDS") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo)
## Rows: 14 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
loo <- read_csv("outputs_LOO/O_Full_ALL_adj_28_LOO_CHRF_global_estimates.csv") %>%
mutate(Study="CHRF") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo)
## Rows: 14 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
loo <- read_csv("outputs_LOO/O_Full_ALL_adj_28_LOO_AIIMS_global_estimates.csv") %>%
mutate(Study="DH") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo)
## Rows: 15 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
loo <- read_csv("outputs_LOO/O_Full_ALL_adj_28_LOO_GBS_global_estimates.csv") %>%
mutate(Study="GBS-COP") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo)
## Rows: 15 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
loo <- read_csv("outputs_LOO/O_Full_ALL_adj_28_LOO_Kilifi_global_estimates.csv") %>%
mutate(Study="Kilifi") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo)
## Rows: 14 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
loo <- read_csv("outputs_LOO/O_Full_ALL_adj_28_LOO_MBIRA_global_estimates.csv") %>%
mutate(Study="MBIRA") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo)
## Rows: 15 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
loo <- read_csv("outputs_LOO/O_Full_ALL_adj_28_LOO_MLW_global_estimates.csv") %>%
mutate(Study="MLW") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo)
## Rows: 14 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
loo <- read_csv("outputs_LOO/O_Full_ALL_adj_28_LOO_Botswana_global_estimates.csv") %>%
mutate(Study="NIMBIplus") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo)
## Rows: 15 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
loo <- read_csv("outputs_LOO/O_Full_ALL_adj_28_LOO_NeoBAC_global_estimates.csv") %>%
mutate(Study="NeoBAC") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo)
## Rows: 15 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
loo <- read_csv("outputs_LOO/O_Full_ALL_adj_28_LOO_NeoOBS_global_estimates.csv") %>%
mutate(Study="NeoOBS-India") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo)
## Rows: 15 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
loo_O <- read_csv("outputs_LOO/O_Full_ALL_adj_28_LOO_SPINZ_global_estimates.csv") %>%
mutate(Study="SPINZ") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo) %>%
mutate(locus=replace(locus, locus=="O1.v1", "O1⍺β,2⍺")) %>%
mutate(locus=replace(locus, locus=="O1.v2", "O1⍺β,2β")) %>%
mutate(locus=replace(locus, locus=="O1.v3", "O1⍺β,2⍺𝛾")) %>%
mutate(locus=replace(locus, locus=="O2a.v1", "O2⍺")) %>%
mutate(locus=replace(locus, locus=="O2a.v3", "O2⍺𝛾")) %>%
mutate(locus=replace(locus, locus=="O2afg.v2", "O2β")) %>%
mutate(locus=replace(locus, locus=="O3/O3a", "O3⍺/O3β")) %>%
mutate(locus=replace(locus, locus=="O3b", "O3𝛾")) %>%
mutate(locus=replace(locus, locus=="OL13", "O13")) %>%
mutate(locus=replace(locus, locus=="OL102", "O14")) %>%
mutate(locus=replace(locus, locus=="OL103", "O10")) %>%
mutate(locus=replace(locus, locus=="OL104", "O15"))
## Rows: 15 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Bayes global sensitivity analysis, top20 in any, print rank - O
type
loo_O_plot <- loo_O %>%
filter(rank<=9) %>%
ggplot(aes(y=locus, x=Study, fill=factor(rank))) +
geom_tile() +
geom_text(aes(y=locus, x=Study, label=round(rank)), size=4) +
scale_y_discrete(limits=rev(obayes$locus_rank$locus[1:9])) +
scale_x_discrete(limits=rev(unique(loo_O$Study))) + # keep the order as per tibble, starting with 'None'
#scale_fill_gradient("Global\nprevalence (%)", low = "white", high = "#081d58")
scale_fill_manual(values = c(rev(brewer.pal(5, "Reds")), rev(brewer.pal(6, "Blues")[-1]))) +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10,
colour = "black"),
axis.title = element_text(size = 10, colour = "black"),
axis.text.y = element_text(size = 10, colour = "black"),
plot.title = element_text(hjust=0.5),
legend.position = "right") +
labs(y="", x="Excluded study", title="Sensitivity analysis - O types", fill="Rank")
Sensitivity analysis - K locus, Bayesian estimate, adjusted
loo <- kbayes$global_est %>% mutate(Study="None")
loo <- read_csv("outputs_LOO/K_Full_ALL_adj_28_LOO_AKU_global_estimates.csv") %>%
mutate(Study="AKU") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo)
## Rows: 87 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
loo <- read_csv("outputs_LOO/K_Full_ALL_adj_28_LOO_BabyGERMS_global_estimates.csv") %>%
mutate(Study="BabyGERMS") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo)
## Rows: 88 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
loo <- read_csv("outputs_LOO/K_Full_ALL_adj_28_LOO_BARNARDS_global_estimates.csv") %>%
mutate(Study="BARNARDS") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo)
## Rows: 84 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
loo <- read_csv("outputs_LOO/K_Full_ALL_adj_28_LOO_CHRF_global_estimates.csv") %>%
mutate(Study="CHRF") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo)
## Rows: 83 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
loo <- read_csv("outputs_LOO/K_Full_ALL_adj_28_LOO_AIIMS_global_estimates.csv") %>%
mutate(Study="DH") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo)
## Rows: 89 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
loo <- read_csv("outputs_LOO/K_Full_ALL_adj_28_LOO_GBS_global_estimates.csv") %>%
mutate(Study="GBS-COP") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo)
## Rows: 90 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
loo <- read_csv("outputs_LOO/K_Full_ALL_adj_28_LOO_Kilifi_global_estimates.csv") %>%
mutate(Study="Kilifi") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo)
## Rows: 85 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
loo <- read_csv("outputs_LOO/K_Full_ALL_adj_28_LOO_MBIRA_global_estimates.csv") %>%
mutate(Study="MBIRA") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo)
## Rows: 89 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
loo <- read_csv("outputs_LOO/K_Full_ALL_adj_28_LOO_MLW_global_estimates.csv") %>%
mutate(Study="MLW") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo)
## Rows: 86 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
loo <- read_csv("outputs_LOO/K_Full_ALL_adj_28_LOO_Botswana_global_estimates.csv") %>%
mutate(Study="NIMBIplus") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo)
## Rows: 90 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
loo <- read_csv("outputs_LOO/K_Full_ALL_adj_28_LOO_NeoBAC_global_estimates.csv") %>%
mutate(Study="NeoBAC") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo)
## Rows: 89 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
loo <- read_csv("outputs_LOO/K_Full_ALL_adj_28_LOO_NeoOBS_global_estimates.csv") %>%
mutate(Study="NeoOBS-India") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo)
## Rows: 90 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
loo <- read_csv("outputs_LOO/K_Full_ALL_adj_28_LOO_SPINZ_global_estimates.csv") %>%
mutate(Study="SPINZ") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo)
## Rows: 90 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
sensitivity analysis for K, top20 in any, print rank
top20_sens <- loo %>% filter(rank<=20) %>% pull(locus) %>% unique()
loo_k_ranks <- loo %>%
filter(rank<=20) %>%
ggplot(aes(y=locus, x=Study, fill=factor(rank))) +
geom_tile() +
geom_text(aes(y=locus, x=Study, label=round(rank)), size=4) +
scale_y_discrete(limits=rev(kbayes$locus_rank$locus[kbayes$locus_rank$locus %in% top20_sens])) +
scale_x_discrete(limits=rev(unique(loo$Study))) + # keep the order as per tibble, starting with 'None'
#scale_fill_gradient("Global\nprevalence (%)", low = "white", high = "#081d58")
scale_fill_manual(values = c(rev(brewer.pal(5, "Reds")), rev(brewer.pal(6, "Purples")[-1]), rev(brewer.pal(6, "Blues")[-1]), rev(brewer.pal(5, "Greens")))) +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10,
colour = "black"),
axis.title = element_text(size = 10, colour = "black"),
axis.text.y = element_text(size = 10, colour = "black"),
plot.title = element_text(hjust=0.5),
legend.position = "right") +
labs(y="", x="Excluded study", title="Sensitivity analysis", fill="Rank") +
geom_hline(yintercept=length(top20_sens)+0.5-5) +
geom_hline(yintercept=length(top20_sens)+0.5-10) +
geom_hline(yintercept=length(top20_sens)+0.5-15) +
geom_hline(yintercept=length(top20_sens)+0.5-20)
LOO - K coverage
studies <- unique(data_NNS_sites10$Study)
studies[c(2,7,8,12)]
## [1] "NIMBIplus" "GBS-COP" "NeoOBS-India" "DH"
studies <- studies[-c(2,7,8,12)]
studies <- c(studies, c("AIIMS", "NeoOBS", "Botswana","GBS")) # filenames
loo_k_coverage <- tibble(
rank = integer(0),
mean = double(0),
median = double(0),
lower = double(0),
upper = double(0),
locus = character(0),
subgroup = character(0),
Study = character(0)
)
for (study in studies) {
# read raw Bayesian estimates for K
loo_kbayes_raw <- parseModelledEstimates(global_post=paste0("outputs_LOO/K_Full_ALL_raw_28_LOO_",study,"_posterior_global.csv.gz"), region_post=paste0("outputs_LOO/K_Full_ALL_raw_28_LOO_",study,"_posterior_subgroup.csv.gz"))
this_coverage <- getCumulativeCoverageGlobalRegional(loo_kbayes_raw, kbayes$locus_rank) %>%
mutate(subgroup=fct_relevel(subgroup,c("Global", "Southern Asia", "Eastern Africa", "Southern Africa", "Western Africa"))) %>%
mutate(Study=study)
loo_k_coverage <- bind_rows(loo_k_coverage, this_coverage)
}
## Rows: 1056000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 4224000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
## Rows: 1032000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 4128000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
## Rows: 996000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3984000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
## Rows: 1008000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 4032000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
## Rows: 1080000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 4320000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
## Rows: 1020000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 4080000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
## Rows: 1068000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 4272000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
## Rows: 1044000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 4176000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
## Rows: 1068000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 4272000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
## Rows: 1068000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 4272000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
## Rows: 1080000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 4320000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
## Rows: 1080000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 4320000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
## Rows: 1080000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 4320000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
loo_k_coverage_plot <- loo_k_coverage %>%
mutate(subgroup=fct_relevel(subgroup,rev(c("Global", "Southern Asia", "Eastern Africa", "Southern Africa", "Western Africa")))) %>%
bind_rows(kbayes_raw_coverage %>% mutate(Study="None")) %>%
filter(rank==20) %>%
mutate(Study=if_else(Study=="NeoOBS", "NeoOBS-India", Study)) %>%
mutate(Study=if_else(Study=="AIIMS", "DH", Study)) %>%
mutate(Study=if_else(Study=="GBS", "GBS-COP", Study)) %>%
mutate(Study=if_else(Study=="Botswana", "NIMBIplus", Study)) %>%
ggplot(aes(x=Study, y=subgroup, fill=mean)) +
geom_tile() +
geom_text(aes(y=subgroup, x=Study, label=round(100*mean, 2)), size=3) +
scale_x_discrete(limits=rev(unique(loo$Study))) +
scale_fill_gradient(low="#C6DBEF", high="#6BAED6") +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10,
colour = "black"),
axis.title = element_text(size = 10, colour = "black"),
axis.text.y = element_text(size = 10, colour = "black"),
plot.title = element_text(hjust=0.5),
legend.position = "right") +
labs(y="", x="Excluded study", fill="Coverage")
Appendix Fig S2.1 - LOO K
loo_k_ranks / loo_k_coverage_plot + patchwork::plot_layout(heights=c(4,1), axes="collect")

ggsave(file="figures/AppendixFigS2.1_LOO_BayesGlobal_Klocus.pdf", width=9, height=11)
ggsave(file="figures/AppendixFigS2.1_LOO_BayesGlobal_Klocus.png", width=9, height=11)
loo_o_coverage <- tibble(
rank = integer(0),
mean = double(0),
median = double(0),
lower = double(0),
upper = double(0),
locus = character(0),
subgroup = character(0),
Study = character(0)
)
for (study in studies) {
# read raw Bayesian estimates for K
loo_obayes_raw <- parseModelledEstimates(global_post=paste0("outputs_LOO/O_Full_ALL_raw_28_LOO_",study,"_posterior_global.csv.gz"), region_post=paste0("outputs_LOO/O_Full_ALL_raw_28_LOO_",study,"_posterior_subgroup.csv.gz"), fixOnames = T)
this_coverage <- getCumulativeCoverageGlobalRegional(loo_obayes_raw, obayes$locus_rank) %>%
mutate(subgroup=fct_relevel(subgroup,c("Global", "Southern Asia", "Eastern Africa", "Southern Africa", "Western Africa"))) %>%
mutate(Study=study)
loo_o_coverage <- bind_rows(loo_o_coverage, this_coverage)
}
## Rows: 180000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 720000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
## Rows: 168000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 672000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
## Rows: 168000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 672000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
## Rows: 168000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 672000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
## Rows: 180000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 720000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
## Rows: 168000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 672000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
## Rows: 180000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 720000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
## Rows: 180000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 720000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
## Rows: 180000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 720000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
## Rows: 180000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 720000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
## Rows: 180000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 720000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
## Rows: 180000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 720000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
## Rows: 180000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 720000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
loo_o_coverage_plot4 <- loo_o_coverage %>%
mutate(subgroup=fct_relevel(subgroup,rev(c("Global", "Southern Asia", "Eastern Africa", "Southern Africa", "Western Africa")))) %>%
bind_rows(obayes_raw_coverage %>% mutate(Study="None")) %>%
filter(rank==4) %>%
mutate(Study=if_else(Study=="NeoOBS", "NeoOBS-India", Study)) %>%
mutate(Study=if_else(Study=="AIIMS", "DH", Study)) %>%
mutate(Study=if_else(Study=="GBS", "GBS-COP", Study)) %>%
mutate(Study=if_else(Study=="Botswana", "NIMBIplus", Study)) %>%
ggplot(aes(x=Study, y=subgroup, fill=mean)) +
geom_tile() +
geom_text(aes(y=subgroup, x=Study, label=round(100*mean, 2)), size=3) +
scale_x_discrete(limits=rev(unique(loo$Study))) +
scale_fill_gradient(low="#C6DBEF", high="#6BAED6") +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10,
colour = "black"),
axis.title = element_text(size = 10, colour = "black"),
axis.text.y = element_text(size = 10, colour = "black"),
plot.title = element_text(hjust=0.5),
legend.position = "right") +
labs(y="", x="Excluded study", fill="Coverage")
loo_o_coverage_plot5 <- loo_o_coverage %>%
mutate(subgroup=fct_relevel(subgroup,rev(c("Global", "Southern Asia", "Eastern Africa", "Southern Africa", "Western Africa")))) %>%
bind_rows(obayes_raw_coverage %>% mutate(Study="None")) %>%
filter(rank==5) %>%
mutate(Study=if_else(Study=="NeoOBS", "NeoOBS-India", Study)) %>%
mutate(Study=if_else(Study=="AIIMS", "DH", Study)) %>%
mutate(Study=if_else(Study=="GBS", "GBS-COP", Study)) %>%
mutate(Study=if_else(Study=="Botswana", "NIMBIplus", Study)) %>%
ggplot(aes(x=Study, y=subgroup, fill=mean)) +
geom_tile() +
geom_text(aes(y=subgroup, x=Study, label=round(100*mean, 2)), size=3) +
scale_x_discrete(limits=rev(unique(loo$Study))) +
scale_fill_gradient(low="#C6DBEF", high="#6BAED6") +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10,
colour = "black"),
axis.title = element_text(size = 10, colour = "black"),
axis.text.y = element_text(size = 10, colour = "black"),
plot.title = element_text(hjust=0.5),
legend.position = "right") +
labs(y="", x="Excluded study", fill="Coverage")
Appendix Fig S2.2
loo_O_plot / (loo_o_coverage_plot4 + ggtitle(label="Coverage of top-4")) / (loo_o_coverage_plot5 + ggtitle(label="Coverage of top-5")) + plot_layout(heights=c(2,1,1), axes="collect")

ggsave(file="figures/AppendixFigS2.2_LOO_BayesGlobal_Olocus.pdf", width=9, height=11, device = cairo_pdf, family = "Arial Unicode MS")
ggsave(file="figures/AppendixFigS2.2_LOO_BayesGlobal_Olocus.png", width=9, height=11)
EFFECT OF TEMPORAL THRESHOLD FOR CLUSTERING ON MODELLED
ESTIMATES
compare global prevalence distributions using counts
cluster-adjusted using 28 vs 365-day threshold
# read Bayesian estimates for K adjusted using 365 days
kbayes_365 <- parseModelledEstimates(global_post="outputs_core/K_Full_ALL_adj_365_posterior_global.csv.gz", region_post="outputs_core/K_Full_ALL_adj_28_posterior_subgroup.csv.gz")
## Rows: 1080000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 4320000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
# plot raw vs adjusted distributions, overlaid - for fig S15
k_dist_28_365 <- comparative_locus_ridgesplot(posterior1=kbayes$global_post,
posterior2=kbayes_365$global_post,
ranks=kbayes$locus_rank,
lines_every=10, xlim=c(0,12), xbreaks=seq(0,12,1),
maxRank=30, type1="28 days", type2="365 days", legend_title="Cluster threshold", scale=2, title="e) Modelled K estimates - posterior")
## Joining with `by = join_by(locus)`
## Picking joint bandwidth of 0.0752
## Warning: Removed 72 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).

# read Bayesian estimates for O adjusted using 365 days
obayes_365 <- parseModelledEstimates(global_post="outputs_core/O_Full_ALL_adj_365_posterior_global.csv.gz", region_post="outputs_core/O_Full_ALL_adj_365_posterior_subgroup.csv.gz", fixOnames = T)
## Rows: 180000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 720000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
# plot raw vs adjusted distributions, overlaid - for fig S15
o_dist_28_365 <- comparative_locus_ridgesplot(posterior1=obayes$global_post,
posterior2=obayes_365$global_post,
ranks=obayes$locus_rank,
lines_every=5, xlim=c(0,30), xbreaks=seq(0,30,2),
maxRank=15, type1="28 days", type2="365 days", legend_title="Cluster threshold", scale=2, title="f) Modelled O estimates - posterior")
## Joining with `by = join_by(locus)`
## Picking joint bandwidth of 0.0953
## Warning: Removed 265 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).

Scatter plots for K mean and rank
adj_28_vs_365 <- full_join(kbayes$global_est %>% arrange(-mean) %>% ungroup %>% mutate(rank=row_number()),
kbayes_365$global_est %>% arrange(-mean) %>% ungroup() %>% mutate(rank=row_number()),
by="locus", suffix=c(".28", ".365"))
timeThreshold_estimates_adj <- adj_28_vs_365 %>%
ggplot(aes(x=mean.28, y=mean.365)) +
geom_abline(intercept=0, slope=1, col="grey") +
geom_point() + theme_bw() +
labs(x="28-day threshold", y="365-day threshold", title="c) Mean K estimates")
timeThreshold_ranks_adj <- adj_28_vs_365 %>%
ggplot(aes(x=rank.28, y=rank.365)) +
geom_abline(intercept=0, slope=1, col="grey") +
geom_point() + theme_bw() +
labs(x="28-day threshold", y="365-day threshold", title="a) Modelled K ranks")
timeThreshold_estimates_adj + timeThreshold_ranks_adj

Scatter plots for O mean and rank
adj_28_vs_365_o <- full_join(obayes$global_est %>% arrange(-mean) %>% ungroup %>% mutate(rank=row_number()),
obayes_365$global_est %>% arrange(-mean) %>% ungroup() %>% mutate(rank=row_number()),
by="locus", suffix=c(".28", ".365"))
timeThreshold_estimates_adj_o <- adj_28_vs_365_o %>%
ggplot(aes(x=mean.28, y=mean.365)) +
geom_abline(intercept=0, slope=1, col="grey") +
geom_point() + theme_bw() +
labs(x="28-day threshold", y="365-day threshold", title="d) Mean O estimates")
timeThreshold_ranks_adj_o <- adj_28_vs_365_o %>%
ggplot(aes(x=rank.28, y=rank.365)) +
geom_abline(intercept=0, slope=1, col="grey") +
geom_point() + theme_bw() +
labs(x="28-day threshold", y="365-day threshold", title="b) Modelled O ranks")
timeThreshold_estimates_adj_o + timeThreshold_ranks_adj_o

Appendix Fig S2.5
(timeThreshold_ranks_adj + timeThreshold_ranks_adj_o) / (timeThreshold_estimates_adj + timeThreshold_estimates_adj_o) / (k_dist_28_365 + o_dist_28_365 + patchwork::plot_layout(guides="collect")) + patchwork::plot_layout(heights=c(1,1,2)) & theme(legend.position='bottom')
## Picking joint bandwidth of 0.0752
## Warning: Removed 72 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).
## Picking joint bandwidth of 0.0953
## Warning: Removed 265 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).

ggsave("figures/AppendixFigS2.5_temporalThresholdModelled.pdf", width=8, height=10, device = cairo_pdf, family = "Arial Unicode MS")
## Picking joint bandwidth of 0.0752
## Warning: Removed 72 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).
## Picking joint bandwidth of 0.0953
## Warning: Removed 265 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).
ggsave("figures/AppendixFigS2.5_temporalThresholdModelled.png", width=8, height=10)
## Picking joint bandwidth of 0.0752
## Warning: Removed 72 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).
## Picking joint bandwidth of 0.0953
## Warning: Removed 265 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).